Advances dg/dt = NL(g,chi) from current g_state, chi from t -> t+dt by using an Runge-Kutta scheme with embedded error estimate for error control
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
complex, | intent(inout), | dimension (-ntgrid:, :, g_lo%llim_proc:) | :: | g_state | ||
complex, | intent(inout), | dimension (-ntgrid:,:,:) | :: | phinew | ||
complex, | intent(inout), | dimension (-ntgrid:,:,:) | :: | aparnew | ||
complex, | intent(inout), | dimension (-ntgrid:,:,:) | :: | bparnew | ||
real, | intent(in) | :: | dt | |||
procedure(invert_field_func) | :: | fields_func | ||||
procedure(source_term_func) | :: | source_func | ||||
type(rk_scheme_type), | intent(in) | :: | scheme |
subroutine advance_nonlinear_term_rk_implementation(g_state, phinew, &
aparnew, bparnew, dt, fields_func, source_func, scheme)
use theta_grid, only: ntgrid
use gs2_time, only: code_dt_min
use mp, only: mp_abort, proc0, max_allreduce
use gs2_layouts, only: g_lo
use rk_schemes, only: rk_scheme_type
use run_parameters, only: immediate_reset
use array_utils, only: copy, gs2_max_abs, zero_array
use warning_helpers, only: exactly_equal
implicit none
complex, dimension (-ntgrid:, :, g_lo%llim_proc:), intent(in out) :: g_state
complex, dimension (-ntgrid:,:,:), intent (in out) :: phinew, aparnew, bparnew
complex, dimension(:, :, :), allocatable :: g_cur, gnl_1
real, intent(in) :: dt
type(rk_scheme_type), intent(in) :: scheme
complex, dimension (:, :, :, :), allocatable :: stages
real :: internal_time, time_step, max_vel, inverse_error
real :: new_time_step, cfl_time_step
integer :: counter, nstages
logical, parameter :: debug = .false.
integer :: istage, istage_sub, iglo
real, dimension(:), allocatable :: solution_coeffs, error_coeffs
real, dimension(3) :: errors
real, save :: time_step_last = -1
procedure(invert_field_func) :: fields_func
procedure(source_term_func) :: source_func
logical :: cfl_limited
real :: half_time
real, dimension(:), allocatable :: stage_coeffs
! Initialise internal variables
if (time_step_last < 0) time_step_last = dt
internal_time = 0.0
counter = 1
time_step = time_step_last
! Copy the relevant coefficients for forming the solution to reduce
! later duplication.
if (scheme%follow_high_order) then
allocate(solution_coeffs, source = scheme%high_order_coeffs)
else
allocate(solution_coeffs, source = scheme%lower_order_coeffs)
end if
! Get the error coefficients
allocate(error_coeffs, source = scheme%high_order_coeffs - scheme%lower_order_coeffs)
nstages = scheme%number_of_stages
allocate(stages(-ntgrid:ntgrid, 2, g_lo%llim_proc:g_lo%ulim_alloc, nstages))
allocate(gnl_1(-ntgrid:ntgrid, 2, g_lo%llim_proc:g_lo%ulim_alloc))
allocate(g_cur(-ntgrid:ntgrid, 2, g_lo%llim_proc:g_lo%ulim_alloc))
call copy(g_state, g_cur) ; call zero_array(stages) ; call zero_array(gnl_1)
! Keep taking explicit steps until we have advanced by dt
do while (internal_time < dt)
if(proc0.and.debug) print*,'Taking step with size',time_step,'at time',internal_time
half_time = 0.5 * time_step
do istage = 1, nstages
call copy(g_cur, g_state)
stage_coeffs = half_time * scheme%coeffs(:, istage)
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP PRIVATE(istage_sub, iglo) &
!$OMP SHARED(g_lo, g_state, half_time, scheme, stages, istage, stage_coeffs) &
!$OMP COLLAPSE(2) &
!$OMP SCHEDULE(static)
do istage_sub = 1, istage-1
do iglo = g_lo%llim_proc, g_lo%ulim_proc
g_state(:, :, iglo) = g_state(:, :, iglo) + stage_coeffs(istage_sub) * stages(:, :, iglo, istage_sub)
end do
end do
!$OMP END PARALLEL DO
call calculate_fields(fields_func, g_state, phinew, aparnew, bparnew)
! Get the current nonlinear source term
call source_func(g_state, stages(:, :, :, istage), &
phinew, aparnew, bparnew, &
max_vel, need_to_adjust = .not. advance_nonadiabatic_dfn, &
calculate_cfl_limit = (istage == 1))
nrhs_calls = nrhs_calls + 1
if (istage == 1) errors(3) = max_vel
end do
! One can directly form the error by simply forming error =
! time_step * 0.5 * sum(stages_i * (high_order_coeffs_i -
! low_order_coeffs_i)). Here we store this in gnl_1
! without the time_step * 0.5 factor (to avoid extra
! array operations). We multiply the final error by
! time_step/2 to ensure full consistency.
! Could we merge this OpenMP parallel region with the
! other loop directly below to avoid some overhead?
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP PRIVATE(iglo) &
!$OMP SHARED(g_lo, gnl_1, error_coeffs, stages) &
!$OMP SCHEDULE(static)
do iglo = g_lo%llim_proc, g_lo%ulim_proc
gnl_1(:, :, iglo) = error_coeffs(1) * stages(:, :, iglo, 1)
end do
!$OMP END PARALLEL DO
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP PRIVATE(iglo, istage) &
!$OMP SHARED(g_lo, gnl_1, error_coeffs, nstages, stages) &
!$OMP COLLAPSE(2) &
!$OMP SCHEDULE(static)
do istage = 2, nstages
do iglo = g_lo%llim_proc, g_lo%ulim_proc
gnl_1(:, :, iglo) = gnl_1(:, :, iglo) + error_coeffs(istage) * stages(:, :, iglo, istage)
end do
end do
!$OMP END PARALLEL DO
! Store the absolute error
errors(1) = gs2_max_abs(gnl_1) * half_time
! Estimate the absolution size of the solution using
! the old solution
errors(2) = gs2_max_abs(g_cur)
call max_allreduce(errors)
inverse_error = get_inverse_error(errors)
cfl_time_step = 1.0 / errors(3)
!Calculate the new_time_step
new_time_step = min(time_step * (inverse_error)**(1.0/scheme%order), cfl_time_step)
cfl_limited = new_time_step < time_step .and. exactly_equal(new_time_step, cfl_time_step)
new_time_step = new_time_step * time_step_safety_factor
if(proc0 .and. debug) print*,'Inverse error is ',inverse_error,'at internal_time', &
internal_time,'with step',time_step,'new time step is',new_time_step, &
'counter=',counter
! If the cfl time step is too small then abort
if (new_time_step < code_dt_min) &
call mp_abort("Time step has become too small in advance_nonlinear_term.", .true.)
! If the error is too large we need to retry with a smaller step, so don't
! update anything
if (inverse_error < 1.0 .or. (cfl_limited .and. immediate_reset)) then
if(proc0.and.debug) print*,'Time step with size',time_step, &
'failed at internal_time',internal_time,'and counter',counter, &
'retrying with step',new_time_step
nfailed_steps = nfailed_steps + 1
else
stage_coeffs = solution_coeffs * half_time
!Update the solution. Note we could probably merge the two
!OpenMP loops into a single OpenMP parallel region and then
!just add OMP DO to each loop. This would save a bit of overhead.
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP PRIVATE(iglo) &
!$OMP SHARED(g_lo, g_cur, solution_coeffs, stage_coeffs, stages) &
!$OMP SCHEDULE(static)
do iglo = g_lo%llim_proc, g_lo%ulim_proc
g_cur(:, :, iglo) = g_cur(:, :, iglo) + stage_coeffs(1) * stages(:, :, iglo, 1)
end do
!$OMP END PARALLEL DO
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP PRIVATE(istage, iglo) &
!$OMP SHARED(g_lo, g_cur, solution_coeffs, nstages, half_time, &
!$OMP stages, stage_coeffs) &
!$OMP COLLAPSE(2) &
!$OMP SCHEDULE(static)
do istage = 2, nstages
do iglo = g_lo%llim_proc, g_lo%ulim_proc
g_cur(:, :, iglo) = g_cur(:, :, iglo) + stage_coeffs(istage) * stages(:, :, iglo, istage)
end do
end do
!$OMP END PARALLEL DO
internal_time = internal_time + time_step
nsuccessful_steps = nsuccessful_steps + 1
end if
!Now update the time step, capped to dt
time_step = min(new_time_step, dt)
! Make sure we don't overshoot the target time by limiting time step.
if(internal_time < dt) then
time_step = min(time_step, dt - internal_time)
counter = counter + 1
end if
end do
if(proc0.and.debug) print*,'Completed rk step in',counter,'steps to',internal_time
! We may be able to avoid this copy and field solve on exit if we restructure the main
! loop slightly so g_state represents the new solution. This may require use of g_work
! or similar to hold intermediate values.
call copy(g_cur, g_state)
time_step_last = time_step
end subroutine advance_nonlinear_term_rk_implementation