!> FIXME : Add documentation module nonlinear_terms ! ! missing factors of B_a/B(theta) in A_perp terms?? ! use abstract_config, only: abstract_config_type, CONFIG_MAX_NAME_LEN use mp, only: mp_request_null implicit none private public :: init_nonlinear_terms, finish_nonlinear_terms public :: read_parameters, wnml_nonlinear_terms, check_nonlinear_terms public :: add_explicit_terms, split_nonlinear, time_add_explicit_terms_field public :: finish_init, reset_init, nonlin, accelerated public :: cfl, time_add_explicit_terms, time_add_explicit_terms_mpi public :: calculate_current_nl_source_and_cfl_limit public :: cfl_req_hand, nb_check_time_step_too_large public :: nonlinear_terms_testing public :: nonlinear_terms_config_type public :: set_nonlinear_terms_config public :: get_nonlinear_terms_config !> Public type allowing tests to access otherwise-private routines type :: nonlinear_terms_testing contains procedure, nopass :: add_nl end type nonlinear_terms_testing integer :: istep_last = 0 ! knobs integer :: nonlinear_mode_switch integer, parameter :: nonlinear_mode_none = 1, nonlinear_mode_on = 2 #ifndef SHMEM real, dimension (:,:), allocatable :: ba, gb, bracket ! yxf_lo%ny, yxf_lo%llim_proc:yxf_lo%ulim_alloc real, dimension (:,:,:), allocatable :: aba, agb, abracket ! 2*ntgrid+1, 2, accelx_lo%llim_proc:accelx_lo%ulim_alloc #else real, save, dimension (:,:), pointer :: ba => null(), gb => null(), bracket => null() ! yxf_lo%ny, yxf_lo%llim_proc:yxf_lo%ulim_alloc real, save, dimension (:,:,:), pointer, contiguous :: aba => null(), & agb => null(), abracket => null() ! 2*ntgrid+1, 2, accelx_lo%llim_proc:accelx_lo%ulim_alloc #endif ! CFL coefficients real :: cfl, cflx, cfly !> Data on maximum velocity of the nonlinear term, the reciprocal of !> the maximum timestep that satisfies the CFL condition. This !> currently has to be module level due to potential non-blocking !> reduction. Stores max_vel_x, max_vel_y and max_vel_error !> components to allow a max reduction over each element. real, dimension(3) :: max_vel_components integer :: cfl_req_hand = mp_request_null logical :: use_cfl_limit, use_2d_cfl !> Variables related to the order based error estimate -- see config documentation real :: error_target integer :: istep_error_start logical :: use_order_based_error logical :: split_nonlinear logical :: include_apar, include_bpar, include_phi real :: time_add_explicit_terms(2) = 0., time_add_explicit_terms_mpi = 0. real :: time_add_explicit_terms_field(2) = 0. logical :: nonlin = .false. logical :: initialized = .false. logical :: initializing = .true. logical :: alloc = .true. logical :: zip = .false. logical :: nl_forbid_force_zero = .true. logical :: accelerated = .false. !> The "large" cfl time step limit to use when we don't !> have a valid cfl limit from the NL term. real, parameter :: dt_cfl_default_large = 1.e8 !> Used to represent the input configuration of nonlinear_terms type, extends(abstract_config_type) :: nonlinear_terms_config_type ! namelist : nonlinear_terms_knobs ! indexed : false !> Scales the estimate CFL limit used to determine when the !> timestep should be changed. The maximum allowed timestep !> satisfies `delt < cfl * min(Delta_perp/v_perp)` where `v_perp !> * delt` is the maximum distance travelled in a single !> timestep. real :: cfl = 0.95 !> Set the error threshold used to determine when the timestep should change if !> [[nonlinear_terms_knobs:use_order_based_error]] is true. real :: error_target = 0.1 !> Flag for testing. If false do not include apar contribution to nonlinear term. logical :: include_apar = .true. !> Flag for testing. If false do not include bpar contribution to nonlinear term. logical :: include_bpar = .true. !> Flag for testing. If false do not include phi contribution to nonlinear term. logical :: include_phi = .true. !> Set the first timestep for which the order based error checks are made if !> [[nonlinear_terms_knobs:use_order_based_error]] is true. integer :: istep_error_start = 30 !> If `true` (default) then forces the nonlinear source term to !> zero in the forbidden region. logical :: nl_forbid_force_zero = .true. !> Determines if the nonlinear terms should be calculated. Must !> be one of: !> !> - 'none' - Do not include nonlinear terms, i.e. run a linear calculation. !> - 'default' - The same as 'none' !> - 'off' - The same as 'none' !> - 'on' - Include nonlinear terms. !> !> Could consider changing this to a logical. character(len = 20) :: nonlinear_mode = 'default' !> Do we evolve the nonlinear term separately from the linear terms (true) or !> include the nonlinear term as a source in the standard algorithm (false). logical :: split_nonlinear = .false. !> If true then sum x and y cfl limiting velocities instead of taking the maximum. logical :: use_2d_cfl = .true. !> If true then use the cfl limit to set the maximum timestep allowed. logical :: use_cfl_limit = .true. !> If true then use an error estimate from comparing the nonlinear source !> calculated using 2nd and 3rd order Adams-Bashforth schemes to control !> the timestep in use. This does not disable the CFL estimate. logical :: use_order_based_error = .false. !> Not currently used, should consider removing. Original !> documentation was "Experts only (for secondary/tertiary !> calculations)." which suggests a close relation to the !> `eqzip` option of [[knobs]]. logical :: zip = .false. contains procedure, public :: read => read_nonlinear_terms_config procedure, public :: write => write_nonlinear_terms_config procedure, public :: reset => reset_nonlinear_terms_config procedure, public :: broadcast => broadcast_nonlinear_terms_config procedure, public, nopass :: get_default_name => get_default_name_nonlinear_terms_config procedure, public, nopass :: get_default_requires_index => get_default_requires_index_nonlinear_terms_config end type nonlinear_terms_config_type type(nonlinear_terms_config_type) :: nonlinear_terms_config contains !> FIXME : Add documentation subroutine check_nonlinear_terms(report_unit,delt_adj) use gs2_time, only: code_dt_min, code_dt_max use kt_grids, only: box use run_parameters, only: nstep, wstar_units use theta_grid, only: nperiod implicit none integer, intent(in) :: report_unit real, intent(in) :: delt_adj if (nonlin) then write (report_unit, *) write (report_unit, fmt="('This is a nonlinear simulation.')") write (report_unit, *) if (wstar_units) then write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('Nonlinear runs require wstar_units = .false. in the knobs namelist.')") write (report_unit, fmt="('THIS IS AN ERROR.')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if if ( .not. box) then write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('Nonlinear runs must be carried out in a box.')") write (report_unit, fmt="('Set grid_option to box in the kt_grids_knobs namelist.')") write (report_unit, fmt="('THIS IS AN ERROR.')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if if (split_nonlinear) then write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('The nonlinear term will be evolved separately from')") write (report_unit, fmt="('the linear terms in a crude IMEX style.')") write (report_unit, fmt="('This is an experimental feature.')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if if (.not. include_apar) then write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('The A|| potential contributions to the nonlinear term are disabled.')") write (report_unit, fmt="('This is probably an error.')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if if (.not. include_bpar) then write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('The B|| potential contributions to the nonlinear term are disabled.')") write (report_unit, fmt="('This is probably an error.')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if if (.not. include_phi) then write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('The electrostatic potential contributions to the nonlinear term are disabled.')") write (report_unit, fmt="('This is probably an error.')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if if (nperiod > 1) then write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('Nonlinear runs usually have nperiod = 1.')") write (report_unit, fmt="('THIS MAY BE AN ERROR.')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if if (use_cfl_limit) then write (report_unit, *) write (report_unit, fmt="('The timestep will be adjusted to satisfy the CFL limit.')") write (report_unit, fmt="('The maximum delt < ',f10.4,' * min(Delta_perp/v_perp). (cfl)')") cfl write (report_unit, *) else write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('Nonlinear usually have use_cfl_limit = T')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if if (use_order_based_error) then write (report_unit, *) write (report_unit, fmt="('The timestep will be adjusted to keep the estimated error below',e11.4)") error_target write (report_unit, fmt="('beginning on step',I0)") istep_error_start write (report_unit, *) else if (.not. use_cfl_limit) then write (report_unit, *) write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, fmt="('Both CFL and error methods for controlling explicit timestep are disabled.')") write (report_unit, fmt="('THIS IS PROBABLY AN ERROR.')") write (report_unit, fmt="('################# WARNING #######################')") write (report_unit, *) end if end if write (report_unit, fmt="('The minimum delt ( code_dt_min ) = ',e11.4)") code_dt_min write (report_unit, fmt="('The maximum delt (code_dt_max) = ',e11.4)") code_dt_max write (report_unit, fmt="('When the time step needs to be changed, it is adjusted by a factor of ',f10.4)") delt_adj write (report_unit, fmt="('The number of time steps nstep = ',i7)") nstep endif end subroutine check_nonlinear_terms !> FIXME : Add documentation subroutine wnml_nonlinear_terms(unit) implicit none integer, intent(in) :: unit if (nonlin) then write (unit, *) write (unit, fmt="(' &',a)") "nonlinear_terms_knobs" write (unit, fmt="(' nonlinear_mode = ',a)") '"on"' write (unit, fmt="(' cfl = ',e17.10)") cfl write (unit, fmt="(' nl_forbid_force_zero = ',L1)") nl_forbid_force_zero if (zip) write (unit, fmt="(' zip = ',L1)") zip write (unit, fmt="(' /')") endif end subroutine wnml_nonlinear_terms !> FIXME : Add documentation subroutine init_nonlinear_terms(nonlinear_terms_config_in) use theta_grid, only: init_theta_grid, ntgrid use kt_grids, only: init_kt_grids, naky, ntheta0, nx, ny, akx, aky use le_grids, only: init_le_grids, nlambda, negrid use species, only: init_species, nspec use gs2_layouts, only: init_dist_fn_layouts, yxf_lo, accelx_lo use gs2_layouts, only: init_gs2_layouts use gs2_transforms, only: init_transforms use mp, only: nproc, iproc use array_utils, only: zero_array #ifdef SHMEM use shm_mpi3, only : shm_alloc #endif implicit none type(nonlinear_terms_config_type), intent(in), optional :: nonlinear_terms_config_in logical, parameter :: debug=.false. if (initialized) return initialized = .true. if (debug) write(6,*) "init_nonlinear_terms: init_gs2_layouts" call init_gs2_layouts if (debug) write(6,*) "init_nonlinear_terms: init_theta_grid" call init_theta_grid if (debug) write(6,*) "init_nonlinear_terms: init_kt_grids" call init_kt_grids if (debug) write(6,*) "init_nonlinear_terms: init_le_grids" call init_le_grids if (debug) write(6,*) "init_nonlinear_terms: init_species" call init_species if (debug) write(6,*) "init_nonlinear_terms: init_dist_fn_layouts" call init_dist_fn_layouts (ntgrid, naky, ntheta0, nlambda, negrid, nspec, nproc, iproc) call read_parameters(nonlinear_terms_config_in) ! Initialise maximum velocity values max_vel_components = 0 if (nonlin) then if (debug) write(6,*) "init_nonlinear_terms: init_transforms" call init_transforms (ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx, ny, accelerated) if (debug) write(6,*) "init_nonlinear_terms: allocations" if (alloc) then if (accelerated) then #ifndef SHMEM allocate ( aba(2*ntgrid+1, 2, accelx_lo%llim_proc:accelx_lo%ulim_alloc)) allocate ( agb(2*ntgrid+1, 2, accelx_lo%llim_proc:accelx_lo%ulim_alloc)) allocate (abracket(2*ntgrid+1, 2, accelx_lo%llim_proc:accelx_lo%ulim_alloc)) #else call shm_alloc(aba, (/ 1, 2*ntgrid+1, 1, 2, accelx_lo%llim_proc, accelx_lo%ulim_alloc /)) call shm_alloc(agb, (/ 1, 2*ntgrid+1, 1, 2, accelx_lo%llim_proc, accelx_lo%ulim_alloc /)) call shm_alloc(abracket, (/ 1, 2*ntgrid+1, 1, 2, accelx_lo%llim_proc, accelx_lo%ulim_alloc /)) #endif call zero_array(aba) ; call zero_array(agb) ; call zero_array(abracket) else #ifndef SHMEM allocate ( ba(yxf_lo%ny,yxf_lo%llim_proc:yxf_lo%ulim_alloc)) allocate ( gb(yxf_lo%ny,yxf_lo%llim_proc:yxf_lo%ulim_alloc)) allocate (bracket(yxf_lo%ny,yxf_lo%llim_proc:yxf_lo%ulim_alloc)) #else call shm_alloc(ba, (/ 1,yxf_lo%ny,yxf_lo%llim_proc,yxf_lo%ulim_alloc /)) call shm_alloc(gb, (/ 1,yxf_lo%ny,yxf_lo%llim_proc,yxf_lo%ulim_alloc /)) call shm_alloc(bracket, (/ 1,yxf_lo%ny,yxf_lo%llim_proc,yxf_lo%ulim_alloc /)) #endif call zero_array(ba) ; call zero_array(gb) ; call zero_array(bracket) end if alloc = .false. end if cfly = maxval(aky) * 0.5 / cfl cflx = maxval(akx) * 0.5 / cfl end if end subroutine init_nonlinear_terms !> FIXME : Add documentation subroutine read_parameters(nonlinear_terms_config_in) use file_utils, only: error_unit use text_options, only: text_option, get_option_value implicit none type(nonlinear_terms_config_type), intent(in), optional :: nonlinear_terms_config_in type (text_option), dimension (4), parameter :: nonlinearopts = & [ text_option('default', nonlinear_mode_none), & text_option('none', nonlinear_mode_none), & text_option('off', nonlinear_mode_none), & text_option('on', nonlinear_mode_on) ] character(len = 20) :: nonlinear_mode integer :: ierr if (present(nonlinear_terms_config_in)) nonlinear_terms_config = nonlinear_terms_config_in call nonlinear_terms_config%init(name = 'nonlinear_terms_knobs', requires_index = .false.) ! Copy out internal values into module level parameters associate(self => nonlinear_terms_config) #include "nonlinear_terms_copy_out_auto_gen.inc" end associate ierr = error_unit() call get_option_value & (nonlinear_mode, nonlinearopts, nonlinear_mode_switch, & ierr, "nonlinear_mode in nonlinear_terms_knobs",.true.) nonlin = nonlinear_mode_switch == nonlinear_mode_on end subroutine read_parameters !> FIXME : Add documentation subroutine add_explicit_terms (g1, g2, g3, phi, apar, bpar, istep, bd) use theta_grid, only: ntgrid use gs2_layouts, only: g_lo use gs2_time, only: save_dt_cfl use job_manage, only: time_message use mp, only: get_mp_times implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: g1, g2, g3 complex, dimension (-ntgrid:,:,:), intent (in) :: phi, apar, bpar integer, intent (in) :: istep real, intent (in) :: bd real :: dt_cfl logical, parameter :: nl = .true. real :: mp_total_after, mp_total call time_message(.false., time_add_explicit_terms, 'Explicit terms') call get_mp_times(total_time = mp_total) if (nonlin) then if (istep /= 0) then call add_explicit (g1, g2, g3, phi, apar, bpar, istep, bd, nl) end if else dt_cfl = dt_cfl_default_large call save_dt_cfl (dt_cfl) end if call time_message(.false., time_add_explicit_terms, 'Explicit terms') call get_mp_times(total_time = mp_total_after) time_add_explicit_terms_mpi = time_add_explicit_terms_mpi + (mp_total_after - mp_total) end subroutine add_explicit_terms !> FIXME : Add documentation subroutine add_explicit (g1, g2, g3, phi, apar, bpar, istep, bd, nl) use theta_grid, only: ntgrid use gs2_layouts, only: g_lo, ik_idx, it_idx, il_idx, is_idx use dist_fn_arrays, only: g use gs2_time, only: save_dt_cfl, code_dt, check_time_step_too_large use run_parameters, only: reset, immediate_reset use unit_tests, only: debug_message use mp, only: nb_max_allreduce, max_allreduce use array_utils, only: copy, zero_array implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: g1, g2, g3 complex, dimension (-ntgrid:,:,:), intent (in) :: phi, apar, bpar integer, intent (in) :: istep real, intent (in) :: bd logical, intent (in), optional :: nl integer, parameter :: verb = 4 integer :: iglo, ik real :: dt_cfl real :: error real :: target_dt call debug_message(verb, 'nonlinear_terms::add_explicit starting') if (initializing) then if (present(nl)) then dt_cfl = dt_cfl_default_large call save_dt_cfl (dt_cfl) ! Ensure the module level max_vel is consistent with ! the selected cfl limiting timestep. max_vel_components = 1.0/dt_cfl end if return endif ! ! @todo Currently not self-starting. Need to fix this. ! call debug_message(verb, 'nonlinear_terms::add_explicit copying old terms') ! The following if statement ensures that the nonlinear term is calculated only once ! per timestep. It guards against calculating the nonlinear term in two cases: ! 1. the second call to `timeadv` in a usual timestep ! 2. both calls to `timeadv` after the CFL condition check has triggered a reset ! of this timestep (when `immediate_reset = .true.`). As the nonlinear term is ! independent of `delt`, it does not need to be calculated again. ! ! N.B. When `immediate_reset = .false.`, the timestep is completed even though the ! CFL condition is broken. `delt` and the response matrix are changed at the end of ! the timestep; the following timestep is "normal", and the nonlinear term is ! calculated. if (istep /= istep_last) then istep_last = istep ! Shuffle time history along -- this is a place pointers might help ! avoid some copying. ! Should we only need to do this if we have a nonlinear simulation? ! It's possible we might have other explicit sources, in which case we ! probably always need to do this shuffling, however in that case this ! subroutine probably best belongs in a different module. call copy(g2, g3) call copy(g1, g2) call debug_message(verb, 'nonlinear_terms::add_explicit copied old terms') ! if running nonlinearly, then compute the nonlinear term at grid points ! and store it in g1 if (present(nl)) then call debug_message(verb, 'nonlinear_terms::add_explicit calling add_nl') ! If we're not using various limits then we'd like to zero ! out the maximum velocity however, we later take 1/max_vel ! so let's just set it small. max_vel_components = epsilon(0.0) call add_nl (g, g1, phi, apar, bpar) ! takes g1 at grid points and returns 2*g1 at cell centers call nl_center (g1, bd) ! Currently the module level max_vel_components contains the ! maximum x and y velocities found from the nonlinear term ! on this processor. This can be used to find the cfl ! limting time step. ! Reset the x and y components of max_vel_components if we ! don't want to consider the cfl limit if (.not. use_cfl_limit) then max_vel_components(1:2) = epsilon(0.0) end if ! Calculate the effective maximum velocity based on the error ! based time step limit (really we estimate the maximum time step ! to keep our errors below our target and then convert to an ! effective velocity). if (use_order_based_error .and. (istep > istep_error_start)) then ! Get the current order based error estimate error = estimate_error(g1, g2, g3) ! Based on the current error estimate and the expected error scaling ! work out what time step we expect to give us the target error. ! Note that we don't scale by cfl here as the user has direct control ! over error_target already. Also note that by taking the square root ! we are assuming that the error looks like dt^2. We should probably ! look at what order schemes we're actually comparing to determine ! the appropriate dependency. target_dt =code_dt*sqrt(error_target/(error)) ! Now we have an error limited time step lets convert this ! to an effective maximum velocity max_vel_components(3) = 1.0/target_dt end if ! Now we can reduce our local limiting velocities to get ! a global value for the maximum velocity. We only need to ! reduce if at least one time limit check is active. if (use_cfl_limit .or. use_order_based_error) then ! Estimate the global cfl limit based on max_vel calculated ! in `add_nl`.If immediate_reset is true, check the max ! allreduce immediately. If not, overlap comms and work, ! and check for reset at the end of the timestep. if(immediate_reset)then call max_allreduce(max_vel_components) ! This call can update the value of reset. call set_cfl_time_step_and_check_if_too_large else ! Do a nonblocking reduce and check later in ! [[gs2_reinit:check_time_step]]. call nb_max_allreduce(max_vel_components, cfl_req_hand) end if else ! If we don't have any checks active then we better make sure that the ! limiting time step is set. We will have set max_vel earlier to ! epsilon(0.0) so we can just use the usual routine to set this, but ! can skip communications. Really we probably just need to do this once ! but for now just put this here to be on the safe side. The cost should ! be low. call set_cfl_time_step_and_check_if_too_large end if if(reset) then return !Return if resetting endif else call zero_array(g1) end if end if if (zip) then do iglo = g_lo%llim_proc, g_lo%ulim_proc ik = ik_idx(g_lo,iglo) if (ik /= 1) then g (:,1,iglo) = 0. g (:,2,iglo) = 0. g1(:,1,iglo) = 0. g1(:,2,iglo) = 0. end if end do end if istep_last = istep end subroutine add_explicit !> Get an estimate of the error on the NL source term by comparing !> 2nd and 3rd order calculations of the source term. !> !> Here we're calculating g at t+code_dt using g(t) = g1, g(t-dt1) = g2 !> and g(t-dt2) = g3 using 2nd and 3rd order methods and comparing !> the difference to estimate the error. real function estimate_error(g1, g2, g3) result(error) use gs2_time, only: get_adams_bashforth_coefficients, time_history_available use mp, only: max_allreduce use array_utils, only: gs2_max_abs implicit none complex, dimension(:, :, :), intent(in) :: g1, g2, g3 complex, dimension(:, :, :), allocatable :: source_order_3, source_order_2 real, dimension(2) :: intermediate_error real, dimension(:), allocatable :: coeff_3rd, coeff_2nd allocate(coeff_3rd(min(time_history_available(), 3))) allocate(coeff_2nd(min(time_history_available(), 2))) ! Check that we actually have 3rd and 2nd order coefficients. ! This could fail on the first two time steps of a simulation, for ! example, where we don't have enough time history to allow a ! 2nd/3rd order calculation. For now just return an error of ! error_target times cfl squared. This is chosen such the target ! timestep remains the current timestep. if ( (size(coeff_3rd) /= 3) .or. (size(coeff_2nd) /= 2) ) then error = error_target *cfl*cfl return end if ! Calculate the coefficients required for variable time step schemes coeff_3rd = get_adams_bashforth_coefficients(maximum_order = 3) coeff_2nd = get_adams_bashforth_coefficients(maximum_order = 2) ! Calculate the source terms using 3rd and 2nd order methods allocate(source_order_3, mold = g1) allocate(source_order_2, mold = g1) source_order_3 = coeff_3rd(1) * g1 + coeff_3rd(2) * g2 + coeff_3rd(3) * g3 source_order_2 = coeff_2nd(1) * g1 + coeff_2nd(2) * g2 ! Here we find the maximum absolute error intermediate_error(1) = gs2_max_abs(source_order_3 - source_order_2) ! Now we find the maximum of source_order_2 to scale the absolute ! error with. Note we use this rather than source_order_3 as the ! higher order method has a smaller region of stability so is ! perhaps more likely to blow up. intermediate_error(2) = gs2_max_abs(source_order_2) ! Reduce over all processors call max_allreduce(intermediate_error) ! Now we form a crude relative error by dividing by the maximum ! source value. We do this rather than forming the true relative ! error (i.e. dividing the entire absolute difference by the source) ! to avoid issues with dividing by small numbers. This will tend to ! underestimate the real relative error. error = intermediate_error(1) / intermediate_error(2) end function estimate_error !> Takes input array evaluated at theta grid points !! and overwrites it with array evaluated at cell centers !! note that there is an extra factor of 2 in output array subroutine nl_center (gtmp, bd) ! !CMR, 13/10/2014 ! Fixing some (cosmetic) issues from prior to R3021. ! (1) forbid(-ntgrid:ntgrid) refers to grid points at cell boundaries ! gtmp(-ntgrid:ntgrid-1) refers to cell centers ! => applying forbid to output gtmp did not zero the correct things!!! ! (2) totally trapped particles are special, so return source without upwinding is fine ! BUT source for other trapped particles should NOT enter forbidden region. ! if ig or ig+1 forbidden (i.e. ig+1 or ig is bounce point) now return gtmp(ig,:,iglo)=0 ! use gs2_layouts, only: g_lo, il_idx use theta_grid, only: ntgrid use le_grids, only: forbid, grid_has_trapped_particles, jend, is_ttp use mp, only: mp_abort implicit none real, intent (in) :: bd complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: gtmp integer :: iglo, il, ig logical :: trapped trapped = grid_has_trapped_particles() ! factor of one-half appears elsewhere !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, il, ig) & !$OMP SHARED(g_lo, nl_forbid_force_zero, forbid, gtmp, ntgrid, is_ttp, jend, trapped, bd) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc il = il_idx(g_lo, iglo) ! !CMR, 7/10/2014: ! Incoming gtmp SHOULD vanish in forbidden region, ! New logical in nonlinear_terms_knobs namelist: nl_forbid_force_zero ! nl_forbid_force_zero =.t. : force zeroing (default) ! nl_forbid_force_zero =.f. : NO forced zeroing ! ie assume forbidden gtmp is zero on entry ! if ( nl_forbid_force_zero ) then ! force spurious gtmp outside trapped boundary to be zero where (forbid(:,il)) gtmp(:,1,iglo) = 0.0 gtmp(:,2,iglo) = 0.0 end where endif do ig = -ntgrid, ntgrid-1 ! !CMR, 7/10/2014: ! loop sets gtmp to value at upwinded cell center RIGHT of theta(ig) ! except for ttp where upwinding makes no sense! ! ! Note this cycle doesn't skip when ig is in the forbidden region ! even if the pitch angle is greater than the totally trapped one ! at this location. This ensures that whilst we leave the (allowed) ! totally trapped source alone we still zero the NL source for forbidden ! points. Previously we cycled simply if the pitch angle index was larger ! than the totally trapped one. When there are multiple totally trapped ! pitch angles this may not be equivalent. if (is_ttp(ig, il)) cycle ! The below might be better off using forbid instead of checking jend if ( trapped .and. ( il > jend(ig) .or. il > jend(ig+1)) ) then ! !CMR, 7/10/2014: ! if either ig or ig+1 is forbidden, no source possible in a cell RIGHT of theta(ig) ! => gtmp(ig,1:2,iglo)=0 ! gtmp(ig,1:2,iglo) = 0.0 else ! !CMR, 7/10/2014: ! otherwise ig and ig+1 BOTH allowed, and upwinding in cell RIGHT of theta(ig) is fine ! gtmp(ig,1,iglo) = (1.+bd)*gtmp(ig+1,1,iglo) + (1.-bd)*gtmp(ig,1,iglo) gtmp(ig,2,iglo) = (1.-bd)*gtmp(ig+1,2,iglo) + (1.+bd)*gtmp(ig,2,iglo) endif end do end do !$OMP END PARALLEL DO end subroutine nl_center !> Calculates the current nonlinear source term by calling add_nl !> and the associated cfl limit. subroutine calculate_current_nl_source_and_cfl_limit(g_in, g1, phi, apar, bpar, & max_vel_local, need_to_adjust, calculate_cfl_limit) use optionals, only: get_option_with_default implicit none complex, dimension (:,:,:), intent (in) :: g_in complex, dimension (:,:,:), intent (out) :: g1 complex, dimension (:,:,:), intent (in) :: phi, apar, bpar real, intent(out) :: max_vel_local logical, intent(in) :: need_to_adjust logical, intent(in), optional :: calculate_cfl_limit use_cfl_limit = get_option_with_default(calculate_cfl_limit, .true.) call add_nl(g_in, g1, phi, apar, bpar, need_to_adjust) if (use_cfl_limit) then ! This code may need changing if the handling of the cfl limit changes max_vel_local = get_max_vel(max_vel_components) else max_vel_local = 1.0e-8 end if end subroutine calculate_current_nl_source_and_cfl_limit !> Calculate the nonlinear term and part of the corresponding CFL condition subroutine add_nl (g_in, g1, phi, apar, bpar, adjust) use theta_grid, only: ntgrid, kxfac use gs2_layouts, only: g_lo, ik_idx, it_idx use dist_fn_arrays, only: g_adjust, from_g_gs2 use gs2_transforms, only: transform2, inverse2 use kt_grids, only: aky, akx use gs2_time, only: save_dt_cfl, check_time_step_too_large use constants, only: zi use unit_tests, only: debug_message use optionals, only: get_option_with_default use array_utils, only: copy, gs2_max_abs implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in) :: g_in complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (out) :: g1 !> Local array used to store the adjusted input dfn if needed. Saves !> calling g_adjust twice. complex, dimension (-ntgrid:ntgrid, 2, g_lo%llim_proc:g_lo%ulim_alloc) :: g2 complex, dimension (-ntgrid:,:,:), intent (in) :: phi, apar, bpar logical, intent(in), optional :: adjust logical :: should_adjust integer, parameter :: verb = 4 integer :: iglo, ik, it real :: max_vel_x, max_vel_y call debug_message(verb, 'nonlinear_terms::add_nl starting') should_adjust = get_option_with_default(adjust, .true.) !Form g1=i*kx*chi call load_kx_phi(g1, phi) call load_kx_bpar(g1, bpar) call load_kx_apar(g1, apar) call debug_message(verb, 'nonlinear_terms::add_nl calling transform2') !Transform to real space if (accelerated) then call transform2 (g1, aba) max_vel_y = gs2_max_abs(aba) else call transform2 (g1, ba) max_vel_y = gs2_max_abs(ba) end if max_vel_y = max_vel_y * cfly call debug_message(verb, 'nonlinear_terms::add_nl calculated dphi/dx') !Form g1=i*ky*g_wesson call copy(g_in, g1) if (should_adjust) call g_adjust(g1, phi, bpar, direction = from_g_gs2) call copy(g1, g2) !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, ik) & !$OMP SHARED(g_lo, g1, aky) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc ik = ik_idx(g_lo,iglo) g1(:,:,iglo)=g1(:,:,iglo)*zi*aky(ik) enddo !$OMP END PARALLEL DO !Transform to real space if (accelerated) then call transform2 (g1, agb) else call transform2 (g1, gb) end if call debug_message(verb, 'nonlinear_terms::add_nl calculated dg/dy') call calculate_bracket(.true.) call debug_message(verb, 'nonlinear_terms::add_nl calculated dphi/dx.dg/dy') !Form g1=i*ky*chi call load_ky_phi(g1, phi) call load_ky_bpar(g1, bpar) call load_ky_apar(g1, apar) call debug_message(verb, 'nonlinear_terms::add_nl calculated dphi/dy') !Transform to real space if (accelerated) then call transform2 (g1, aba) max_vel_x = gs2_max_abs(aba) else call transform2 (g1, ba) max_vel_x = gs2_max_abs(ba) end if max_vel_x = max_vel_x * cflx !Form g1=i*kx*g_wesson noting that g2 holds our starting g_wesson !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, it) & !$OMP SHARED(g_lo, g2, akx) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc it = it_idx(g_lo,iglo) g2(:,:,iglo) = g2(:,:,iglo) * zi * akx(it) enddo !$OMP END PARALLEL DO !Transform to real space if (accelerated) then call transform2 (g2, agb) else call transform2 (g2, gb) end if call debug_message(verb, 'nonlinear_terms::add_nl calculated dg/dx') call calculate_bracket(.false.) call debug_message(verb, & 'nonlinear_terms::add_nl calculated dphi/dx.dg/dy.dphi/dy.dg/dx') !Transform NL source back to spectral space if (accelerated) then call inverse2 (abracket, g1) else call inverse2 (bracket, g1) end if !Store local max vel components for future max reduce max_vel_components(1) = max_vel_x max_vel_components(2) = max_vel_y ! Scale by kxfac !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo) & !$OMP SHARED(g_lo, g1, kxfac) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc g1(:, :, iglo) = g1(:, :, iglo) * kxfac end do !$OMP END PARALLEL DO call debug_message(verb, & 'nonlinear_terms::add_nl calculated nonlinear term') end subroutine add_nl !> FIXME : Add documentation subroutine load_kx_phi(g1, phi) use theta_grid, only: ntgrid use gs2_layouts, only: g_lo, ik_idx, it_idx use dist_fn_arrays, only: aj0 use run_parameters, only: has_phi, fphi use kt_grids, only: akx use constants, only: zi use array_utils, only: zero_array implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: g1 complex, dimension (-ntgrid:,:,:), intent (in) :: phi complex, dimension(-ntgrid:ntgrid) :: fac integer :: iglo, it, ik if (has_phi .and. include_phi) then !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, it, ik, fac) & !$OMP SHARED(g_lo, g1, akx, aj0, phi, fphi) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc it = it_idx(g_lo,iglo) ik = ik_idx(g_lo,iglo) fac = zi*akx(it)*aj0(:,iglo)*phi(:,it,ik)*fphi g1(:,1,iglo) = fac g1(:,2,iglo) = fac end do !$OMP END PARALLEL DO else call zero_array(g1) endif end subroutine load_kx_phi !> FIXME : Add documentation subroutine load_ky_phi(g1, phi) use theta_grid, only: ntgrid use gs2_layouts, only: g_lo, ik_idx, it_idx use dist_fn_arrays, only: aj0 use run_parameters, only: has_phi, fphi use kt_grids, only: aky use constants, only: zi use array_utils, only: zero_array implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: g1 complex, dimension (-ntgrid:,:,:), intent (in) :: phi complex, dimension(-ntgrid:ntgrid) :: fac integer :: iglo, it, ik if (has_phi .and. include_phi) then !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, it, ik, fac) & !$OMP SHARED(g_lo, g1, aky, aj0, phi, fphi) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc it = it_idx(g_lo,iglo) ik = ik_idx(g_lo,iglo) fac = zi*aky(ik)*aj0(:,iglo)*phi(:,it,ik)*fphi g1(:,1,iglo) = fac g1(:,2,iglo) = fac end do !$OMP END PARALLEL DO else call zero_array(g1) endif end subroutine load_ky_phi ! should I use vpa or vpac in next two routines?? !> FIXME : Add documentation subroutine load_kx_apar(g1, apar) use theta_grid, only: ntgrid use gs2_layouts, only: g_lo, ik_idx, it_idx, is_idx use dist_fn_arrays, only: vpa, aj0 use species, only: spec use run_parameters, only: has_apar, fapar use kt_grids, only: akx use constants, only: zi implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: g1 complex, dimension (-ntgrid:,:,:), intent (in) :: apar complex, dimension(-ntgrid:ntgrid) :: fac integer iglo, it, ik, is if(.not. (include_apar .and. has_apar) ) return !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, it, ik, is, fac) & !$OMP SHARED(g_lo, g1, vpa, akx, aj0, spec, apar, fapar) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc it = it_idx(g_lo,iglo) ik = ik_idx(g_lo,iglo) is = is_idx(g_lo,iglo) fac = zi*akx(it)*aj0(:,iglo)*spec(is)%stm*apar(:,it,ik)*fapar g1(:,1,iglo) = g1(:,1,iglo) - fac*vpa(:,1,iglo) g1(:,2,iglo) = g1(:,2,iglo) - fac*vpa(:,2,iglo) end do !$OMP END PARALLEL DO end subroutine load_kx_apar !> FIXME : Add documentation subroutine load_ky_apar(g1, apar) use theta_grid, only: ntgrid use gs2_layouts, only: g_lo, ik_idx, it_idx, is_idx use dist_fn_arrays, only: vpa, aj0 use species, only: spec use run_parameters, only: has_apar, fapar use kt_grids, only: aky use constants, only: zi implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: g1 complex, dimension (-ntgrid:,:,:), intent (in) :: apar complex, dimension(-ntgrid:ntgrid) :: fac integer iglo, it, ik, is if(.not. (include_apar .and. has_apar)) return !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, it, ik, is, fac) & !$OMP SHARED(g_lo, g1, vpa, aky, aj0, spec, apar, fapar) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc it = it_idx(g_lo,iglo) ik = ik_idx(g_lo,iglo) is = is_idx(g_lo,iglo) fac = zi*aky(ik)*aj0(:,iglo)*spec(is)%stm*apar(:,it,ik)*fapar g1(:,1,iglo) = g1(:,1,iglo) - fac*vpa(:,1,iglo) g1(:,2,iglo) = g1(:,2,iglo) - fac*vpa(:,2,iglo) end do !$OMP END PARALLEL DO end subroutine load_ky_apar !> FIXME : Add documentation subroutine load_kx_bpar(g1, bpar) use theta_grid, only: ntgrid use gs2_layouts, only: g_lo, ik_idx, it_idx, is_idx use dist_fn_arrays, only: vperp2, aj1 use species, only: spec use run_parameters, only: has_bpar, fbpar use kt_grids, only: akx use constants, only: zi implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: g1 complex, dimension (-ntgrid:,:,:), intent (in) :: bpar complex, dimension(-ntgrid:ntgrid) :: fac integer iglo, it, ik, is if (.not. (include_bpar .and. has_bpar)) return ! Is this factor of two from the old normalization? !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, it, ik, is, fac) & !$OMP SHARED(g_lo, g1, akx, aj1, vperp2, spec, bpar, fbpar) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc it = it_idx(g_lo,iglo) ik = ik_idx(g_lo,iglo) is = is_idx(g_lo,iglo) fac = zi*akx(it)*aj1(:,iglo) & *2.0*vperp2(:,iglo)*spec(is)%tz*bpar(:,it,ik)*fbpar g1(:,1,iglo) = g1(:,1,iglo) + fac g1(:,2,iglo) = g1(:,2,iglo) + fac end do !$OMP END PARALLEL DO end subroutine load_kx_bpar !> FIXME : Add documentation subroutine load_ky_bpar(g1, bpar) use theta_grid, only: ntgrid use gs2_layouts, only: g_lo, ik_idx, it_idx, is_idx use dist_fn_arrays, only: vperp2, aj1 use species, only: spec use run_parameters, only: has_bpar, fbpar use kt_grids, only: aky use constants, only: zi implicit none complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in out) :: g1 complex, dimension (-ntgrid:,:,:), intent (in) :: bpar complex, dimension(-ntgrid:ntgrid) :: fac integer iglo, it, ik, is if (.not. (include_bpar .and. has_bpar)) return ! Is this factor of two from the old normalization? !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iglo, it, ik, is, fac) & !$OMP SHARED(g_lo, g1, aky, aj1, vperp2, spec, bpar, fbpar) & !$OMP SCHEDULE(static) do iglo = g_lo%llim_proc, g_lo%ulim_proc it = it_idx(g_lo,iglo) ik = ik_idx(g_lo,iglo) is = is_idx(g_lo,iglo) fac = zi*aky(ik)*aj1(:,iglo) & *2.0*vperp2(:,iglo)*spec(is)%tz*bpar(:,it,ik)*fbpar g1(:,1,iglo) = g1(:,1,iglo) + fac g1(:,2,iglo) = g1(:,2,iglo) + fac end do !$OMP END PARALLEL DO end subroutine load_ky_bpar !> Calculate (d Chi /dx).(d g_wesson/dy) and store in bracket if is_first_term = .true. !> else calculate (d Chi /dy).(d g_wesson/dx) and subtract from bracket !> !> @note The final bracket value must be scaled by kxfac to produce !> the correct value to be used. subroutine calculate_bracket(is_first_term) use gs2_layouts, only: accelx_lo, yxf_lo implicit none logical, intent(in) :: is_first_term integer :: iyxf if (is_first_term) then if (accelerated) then !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iyxf) & !$OMP SHARED(accelx_lo, agb, aba, abracket) & !$OMP SCHEDULE(static) do iyxf = accelx_lo%llim_proc, accelx_lo%ulim_proc abracket(:, :, iyxf) = aba(:, :, iyxf)*agb(:, :, iyxf) end do !$OMP END PARALLEL DO else !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iyxf) & !$OMP SHARED(yxf_lo, bracket, ba, gb) & !$OMP SCHEDULE(static) do iyxf = yxf_lo%llim_proc, yxf_lo%ulim_proc bracket(:, iyxf) = ba(:, iyxf)*gb(:, iyxf) end do !$OMP END PARALLEL DO endif else if (accelerated) then !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iyxf) & !$OMP SHARED(accelx_lo, agb, aba, abracket) & !$OMP SCHEDULE(static) do iyxf = accelx_lo%llim_proc, accelx_lo%ulim_proc abracket(:, :, iyxf) = abracket(:, :, iyxf) - aba(:, :, iyxf)*agb(:, :, iyxf) end do !$OMP END PARALLEL DO else !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(iyxf) & !$OMP SHARED(yxf_lo, bracket, ba, gb) & !$OMP SCHEDULE(static) do iyxf = yxf_lo%llim_proc, yxf_lo%ulim_proc bracket(:, iyxf) = bracket(:, iyxf) - ba(:, iyxf)*gb(:, iyxf) end do !$OMP END PARALLEL DO endif endif end subroutine calculate_bracket !> Finish nonblocking calculation of the maximum velocity !> and set the cfl limit. subroutine nb_check_time_step_too_large use run_parameters, only: immediate_reset implicit none if (immediate_reset) return call finish_nonblocking_max_vel_reduction call set_cfl_time_step_and_check_if_too_large end subroutine nb_check_time_step_too_large !> Finish nonblocking calculation of the maximum velocity !> !> This receives the mpi_iallreduce of the maximum velocity. subroutine finish_nonblocking_max_vel_reduction use mp, only: wait, mp_request_null implicit none ! Wait for the nonblocking maximum operation used to find ! the maximum velocity. if (cfl_req_hand == mp_request_null) return call wait(cfl_req_hand) cfl_req_hand = mp_request_null end subroutine finish_nonblocking_max_vel_reduction !> Given the max_vel_components array return the limiting max_vel real pure function get_max_vel(components) result(max_vel) real, dimension(3), intent(in) :: components real :: max_vel_cfl, max_vel_error if (use_2d_cfl) then max_vel_cfl = components(1) + components(2) else max_vel_cfl = maxval(components(1:2)) end if max_vel_error = components(3) max_vel = max(max_vel_cfl, max_vel_error) end function get_max_vel !> Calculates the cfl limit from the module level max_vel value !> saves it using [[gs2_time:save_dt_cfl]] and then sets the reset !> flag based on checking if the current time step is too large. subroutine set_cfl_time_step_and_check_if_too_large use run_parameters, only: reset use gs2_time, only: save_dt_cfl, check_time_step_too_large implicit none real :: dt_cfl, max_vel max_vel = get_max_vel(max_vel_components) ! Estimate the global cfl limit based on max_vel. if (max_vel <= 0.0) then ! We should only end up here in unusual cases (likely ! tests) as we initialise max_vel > 0 so the only way ! we get here is if the estimated NL velocity limit is ! exactly 0. dt_cfl = dt_cfl_default_large else dt_cfl = 1./max_vel end if call save_dt_cfl (dt_cfl) ! Now check to see if we've violated the cfl condition. call check_time_step_too_large(reset) end subroutine set_cfl_time_step_and_check_if_too_large !> FIXME : Add documentation subroutine reset_init implicit none initialized = .false. initializing = .true. end subroutine reset_init !> FIXME : Add documentation subroutine finish_init implicit none initializing = .false. end subroutine finish_init !> FIXME : Add documentation subroutine finish_nonlinear_terms use gs2_transforms, only: finish_transforms #ifdef SHMEM use shm_mpi3, only : shm_free #endif implicit none #ifndef SHMEM if (allocated(aba)) deallocate (aba, agb, abracket) if (allocated(ba)) deallocate (ba, gb, bracket) #else if (associated(aba)) call shm_free(aba) if (associated(agb)) call shm_free(agb) if (associated(abracket)) call shm_free(abracket) if (associated(ba)) then call shm_free(ba) endif if (associated(gb)) then call shm_free(gb) endif if (associated(bracket)) then call shm_free(bracket) endif #endif nonlin = .false. ; alloc = .true. ; zip = .false. ; accelerated = .false. initialized = .false. ; initializing = .true. call finish_transforms call nonlinear_terms_config%reset() end subroutine finish_nonlinear_terms !> Set the module level config type !> Will abort if the module has already been initialised to avoid !> inconsistencies. subroutine set_nonlinear_terms_config(nonlinear_terms_config_in) use mp, only: mp_abort type(nonlinear_terms_config_type), intent(in), optional :: nonlinear_terms_config_in if (initialized) then call mp_abort("Trying to set nonlinear_terms_config when already initialized.", to_screen = .true.) end if if (present(nonlinear_terms_config_in)) then nonlinear_terms_config = nonlinear_terms_config_in end if end subroutine set_nonlinear_terms_config #include "nonlinear_terms_auto_gen.inc" end module nonlinear_terms