!> This module is based on the fields_local module (which is in term a re-implementation of the !! fields implicit functionality). fields_gf_local implements the implicit field calculation !! using a gf_lo based data decomposition to improve performance on large process counts. It !! utilises a smaller set of processes to reduce communication costs (at the expense of significant !! load imbalance in the fields calculations). It assumes gf_lo velocity space integration and !! stores field data in gf_lo format (i.e. ik,it are decomposed over processes, everthing else is !! local). !! !! Implemented by Adrian Jackson, EPCC, The University of Edinburgh; 2014-2016. Funded by an !! EPSRC ARCHER eCSE project. module fields_gf_local use mp, only: comm_type implicit none private !> Module level routines public :: init_fields_gf_local, init_allfields_gf_local, finish_fields_gf_local public :: advance_gf_local, reset_fields_gf_local, dump_response_to_file_gf_local public :: fields_gf_local_functional, read_response_from_file_gf_local public :: field_local_allreduce_sub !> User set parameters public :: dump_response, read_response !> Made public for testing public :: fieldmat !////////////////////////////// !// CUSTOM TYPES !////////////////////////////// !> This is the lowest level of data object and will hold the actual data !! involved with the field matrix type, private :: rowblock_type complex, dimension(:,:), allocatable :: data !< This is the field data integer :: row_llim, row_ulim !< The row limits this block corresponds to integer :: col_llim, col_ulim !< The col limits this block corresponds to integer :: nrow, ncol !< The size of the row block complex, dimension(:),allocatable :: tmp_sum !< Work space for field update !< /Row and parent properties. Mostly for debug printing integer :: ir_ind !< Property of rowblock integer :: it_ind, ik_ind !< Details of parents integer :: is_ind, ic_ind !< Details of parents so cell can find itself contains private procedure :: deallocate => rb_deallocate procedure :: allocate => rb_allocate procedure :: init => rb_init procedure :: debug_print => rb_debug_print procedure :: mv_mult_fun => rb_mv_mult_fun procedure :: mv_mult => rb_mv_mult procedure :: irex_to_ir => rb_irex_to_irloc procedure :: reset => rb_reset procedure :: set_nrow => rb_set_nrow end type rowblock_type !> This is the next level up of data and represents the cell type. !! A cell has a unique it,ik pair and represents all the fields/field eq !! on the extended domain for the given it/ik type, private :: cell_type type(rowblock_type), dimension(:), allocatable :: rb !< These are the row blocks, currently one for each field equation integer :: nrb !< How many row blocks, equal to nfield !//NOTE: probably don't want these as this data should be stored by the row blocks only integer :: row_llim, row_ulim !< The row limits, always 1 to nfield*nextend integer, dimension(:), allocatable :: col_llim, col_ulim !< These are the column limits for each fdq !////// integer :: nrow, ncol !< The number of rows and columns for each field equation. integer :: ncol_tot !< The total number of columns (nfdq*ncol) logical :: is_local !< Does this cell have any data on this proc? logical :: is_empty !< Have we got any data for this cell on this proc? logical :: is_all_local !< Is all of this cells data on this proc? logical :: ignore_boundary !< Do we ignore the boundary point (column) in this cell? complex, dimension(:),allocatable :: tmp_sum !< Work space for field update type(comm_type) :: parent_sub !< Sub communicator involving all processors in parent !Cell and parent properties. Mostly for debug printing. integer :: ic_ind, it_ind !< Cell properties integer :: ik_ind, is_ind !< Parent properties contains private procedure :: deallocate => c_deallocate procedure :: allocate => c_allocate procedure :: init => c_init procedure :: debug_print => c_debug_print procedure :: mv_mult_rb => c_mv_mult_rb procedure :: get_field_update => c_get_field_update procedure :: reset => c_reset procedure :: set_locality => c_set_locality procedure :: has_row => c_has_row end type cell_type !> This is the next level up of data and represents the supercell type. !! A supercell represents a collection of connected cells type, private :: supercell_type type(cell_type), dimension(:), allocatable :: cells !< These are the cells integer :: ncell integer :: nextend !< Length of the extended domain integer :: nrow, ncol !< The number of rows and columns. Equal to nextend*nfield integer :: it_leftmost !< It index of leftmost cell integer :: head_iproc_world !< The proc id (in comm world) of the head proc integer :: head_iproc !< The proc id (in sc_sub_all) of the head proc integer :: head_iproc_pd !< The proc id (in sc_sub_pd) of the head proc logical :: is_local !< Does this supercell have any data on this proc? logical :: is_empty !< Have we got any data for this supercell on this proc? logical :: is_all_local !< Is all of this supercells data on this proc? complex, dimension(:),allocatable :: tmp_sum !< Work space for field update type(comm_type) :: sc_sub_all !< Sub communicator involving all processors with this supercell type(comm_type) :: sc_sub_pd !< Sub communicator for all procs with some data but not all of it type(comm_type) :: sc_sub_not_full !< Sub communicator involving all processors with this supercell type(comm_type) :: parent_sub !< Sub communicator involving all processors in parent integer, dimension(:),allocatable :: nb_req_hand !< For non-blocking broadcast request handle storage logical :: initdone !< Have we finished initialising this block? logical, dimension(:), allocatable :: initialised !< Have we initialised each point? logical :: is_head=.false. !< Are we the head of this supercell? !Cell and parent properties. Mostly for debug printing. integer :: ik_ind !< Parent properties !Cell and parent properties. Mostly for debug printing. !> The is_ind value is the index of the supercell in the parent ky_type's !> supercell array. integer :: is_ind !> The is_label is the supercell_label of this supercell as determined !> by calculate_supercell_labels from elsewhere. integer :: is_label contains private procedure :: deallocate => sc_deallocate procedure :: allocate => sc_allocate procedure :: init => sc_init procedure :: debug_print => sc_debug_print procedure :: get_field_update => sc_get_field_update procedure :: reduce_tmpsum => sc_reduce_tmpsum procedure :: iex_to_dims => sc_iex_to_dims procedure :: iex_to_ifl => sc_iex_to_ifl procedure :: iex_to_ic => sc_iex_to_ic procedure :: iex_to_ig => sc_iex_to_ig procedure :: iex_to_it => sc_iex_to_it procedure :: has_it => sc_has_it procedure :: reset => sc_reset procedure :: set_locality => sc_set_locality procedure :: get_left_it => sc_get_left_it procedure :: store_fq => sc_store_fq procedure :: pull_rows_to_arr => sc_pull_rows_to_arr procedure :: push_arr_to_rows => sc_push_arr_to_rows procedure :: prepare => sc_prepare procedure :: invert => sc_invert procedure :: invert_local => sc_invert_local procedure :: invert_mpi => sc_invert_mpi procedure :: dump => sc_dump procedure :: make_subcom_1 => sc_make_subcom_1 procedure :: make_subcom_2 => sc_make_subcom_2 end type supercell_type !> This is the next level up of data and represents the ky type. !! A ky block consists of a number of supercells type, private :: ky_type type(supercell_type), dimension(:), allocatable :: supercells !< These are the supercells integer :: nsupercell logical :: is_local !< Does this supercell have any data on this proc? logical :: is_empty !< Have we got any data for this supercell on this proc? logical :: is_all_local !< Is all of this supercells data on this proc? logical :: initdone !< Have we finished initialising this block? type(comm_type) :: ky_sub_all !< Sub communicator involving all processors with this ky type(comm_type) :: parent_sub !< Sub communicator involving all processors in parent !Cell and parent properties. Mostly for debug printing. integer :: ik_ind contains private procedure :: deallocate => ky_deallocate procedure :: allocate => ky_allocate procedure :: init => ky_init procedure :: debug_print => ky_debug_print procedure :: get_field_update => ky_get_field_update procedure :: reset => ky_reset procedure :: set_locality => ky_set_locality procedure :: is_from_it => ky_is_from_it procedure :: store_fq => ky_store_fq procedure :: prepare => ky_prepare procedure :: make_subcom_1 => ky_make_subcom_1 procedure :: make_subcom_2 => ky_make_subcom_2 end type ky_type !> This is the top level object, consisting of a collection of !! ky blocks. type, private :: fieldmat_type type(ky_type), dimension(:), allocatable :: kyb !< The ky blocks integer :: naky !< Number of ky blocks integer :: ntheta0 integer :: npts !< Total number of theta grid points on all extended domains integer :: nbound !< Number of ignored boundary points logical :: is_local !< Does this supercell have any data on this proc? logical :: is_empty !< Have we got any data for the fieldmat on this proc? logical :: is_all_local !< Is all of this supercells data on this proc? type(comm_type) :: fm_sub_all !< Sub communicator involving all processors with fieldmat type(comm_type) :: fm_sub_headsc_p0 !< Sub communicator involving the supercell heads and proc0 integer :: prepare_type=0 !< What sort of preparation do we do to the field matrix integer, dimension(:,:), allocatable :: heads !< An array that holds the iproc (from comm world) of the supercell head for a given ik,it point !! Currently only support: !! 0 : Invert the field matrix !! In the future may wish to do things like LU decomposition etc. !! Could also do things like eigenvalue analysis etc. contains private procedure :: deallocate => fm_deallocate procedure :: allocate => fm_allocate procedure :: debug_print => fm_debug_print procedure :: get_field_update => fm_get_field_update procedure :: init => fm_init procedure :: reset => fm_reset procedure :: populate => fm_populate procedure :: init_next_field_points => fm_init_next_field_points procedure :: prepare => fm_prepare procedure :: make_subcom_1 => fm_make_subcom_1 procedure :: make_subcom_2 => fm_make_subcom_2 procedure :: calc_sc_heads => fm_calc_sc_heads procedure :: gather_fields => fm_gather_fields procedure :: gather_init_fields => fm_gather_init_fields procedure :: scatter_fields => fm_scatter_fields procedure :: unpack_to_field => fm_unpack_to_field procedure :: write_debug_data => fm_write_debug_data procedure :: set_is_local => fm_set_is_local procedure :: count_subcom => fm_count_subcom procedure :: getfieldeq_nogath => fm_getfieldeq_nogath procedure :: getfieldeq1_nogath => fm_getfieldeq1_nogath procedure, nopass :: update_fields => fm_update_fields procedure, nopass :: update_fields_newstep => fm_update_fields_newstep end type fieldmat_type !> This is the type which controls/organises the parallel !! data decomposition etc. type, private :: pc_type integer, dimension(:,:), allocatable :: is_local !< Is the given it,ik on this proc? !NOTE: We have is_local as integer rather than logical so we can reduce across procs !to count how many procs have a given cell type(comm_type), dimension(:,:), allocatable :: itik_subcom !< Cell level sub-communicators integer, dimension(:,:), allocatable :: nproc_per_cell !< How many procs have this cell integer, dimension(:,:), allocatable :: nresp_per_cell !< How many rows are we personally responsible for in each cell integer, dimension(:,:), allocatable :: navail_per_cell !< How many rows could we be responsible for in each cell integer :: nresp_tot, navail_tot !< Total number of rows available/potentially available logical :: force_local_invert=.false. !< If true then we force local inversion !!the local fields. This may impact diagnostics/parallel writing as it means only the processors gf_lo local section !! of the fields are evolved on this processor. !! @NOTE If we're doing force local invert really when we populate the supercell we should store all of the !! data locally so we don't need to fetch it later. contains private procedure :: deallocate => pc_deallocate procedure :: allocate => pc_allocate procedure :: debug_print => pc_debug_print procedure :: current_nresp => pc_current_nresp procedure :: find_locality => pc_find_locality procedure :: count_avail => pc_count_avail procedure :: has_ik => pc_has_ik procedure :: has_it => pc_has_it procedure :: make_decomp => pc_make_decomp procedure :: init => pc_init procedure :: reset => pc_reset !//The different decomposition routines procedure :: decomp => pc_decomp end type pc_type !////////////////////////////// !// MODULE VARIABLES !////////////////////////////// integer :: dlun=6 !< File unit for debug printing !Tuning this can improve advance/init time (usually one at the expense of other) logical,parameter :: debug=.false. !< Do we do debug stuff? logical :: initialised=.false. !< Have we initialised yet? logical :: reinit=.false. !< Are we reinitialising? logical :: dump_response=.false. !< Do we dump the response matrix? logical :: read_response=.false. !< Do we read the response matrix from dump? logical :: field_local_allreduce_sub = .false. !< If true and field_local_allreduce true then do two sub comm all reduces rather than 1 integer :: nfield !< How many fields type(pc_type),save :: pc !< This is the parallel control object type(fieldmat_type),save :: fieldmat !< This is the top level field matrix object contains !////////////////////////////// !// CUSTOM TYPES !////////////////////////////// ! !================ !ROWBLOCK !================ !> Allocate storage space subroutine rb_allocate(self) implicit none class(rowblock_type), intent(inout) :: self allocate(self%data(self%ncol,self%nrow)) allocate(self%tmp_sum(self%nrow)) self%data = 0. self%tmp_sum = 0. end subroutine rb_allocate !> Deallocate storage space subroutine rb_deallocate(self) implicit none class(rowblock_type), intent(inout) :: self if(allocated(self%data)) deallocate(self%data) if(allocated(self%tmp_sum)) deallocate(self%tmp_sum) end subroutine rb_deallocate !> Initialise the members of the rowblock instance subroutine rb_init(self, ifq, ic, it, is, ik, ncol, nextend) use theta_grid, only: ntgrid implicit none class(rowblock_type), intent(in out) :: self integer, intent(in) :: ifq, ic, it, is, ik, ncol !> The length of the parent supercell's real space !> domain. Represents the spacing between the start of different !> field/field equations as we store data as the full real space !> domain for each field, e.g. [fieldeq(:), fieldeqa(:), !> fieldeqp(:)], rather than say interleaved, e.g. [[fieldeq(1), !> fieldeqa(1), fieldeqp(1)], [fieldeq(2), fieldeqa(2), !> fieldeqp(2)],...] integer, intent(in) :: nextend ! Store rowblock index, essentially which fieldeq we represent self%ir_ind = ifq ! Store indices of parent types self%ic_ind = ic self%it_ind = it self%is_ind = is self%ik_ind = ik ! Store data limits self%ncol = ncol self%col_llim = 1 + (ic - 1) * (2 * ntgrid) + (ifq - 1) * nextend self%col_ulim = self%col_llim + ncol - 1 end subroutine rb_init !> Debug printing subroutine rb_debug_print(self) implicit none class(rowblock_type), intent(in) :: self write(dlun,'("Rowblock debug print. Index ir=",I0," nrow=",I0," ncol=",I0," rl,ru=",I0," ",I0, " and cl,cu=",I0," ",I0)') & self%ir_ind, self%nrow, self%ncol, & self%row_llim, self%row_ulim, & self%col_llim, self%col_ulim end subroutine rb_debug_print !> Matrix vector multiplication function rb_mv_mult_fun(self,vect) implicit none class(rowblock_type), intent(in) :: self complex,dimension(self%ncol), intent(in) :: vect complex,dimension(self%nrow) :: rb_mv_mult_fun !As a 1d vector in fortran is a "row vector" (as the !first index represents columns) matmul is a bit annoying !when trying to do a matrix-vector product rb_mv_mult_fun=matmul(vect,self%data) end function rb_mv_mult_fun !> Matrix vector multiplication stored in tmp_sum subroutine rb_mv_mult(self,vect) implicit none class(rowblock_type), intent(inout) :: self complex,dimension(self%ncol), intent(in) :: vect self%tmp_sum=self%mv_mult_fun(vect) end subroutine rb_mv_mult !> Convert extended row index to local row index function rb_irex_to_irloc(self,irex) implicit none class(rowblock_type), intent(in) :: self integer, intent(in) :: irex integer :: rb_irex_to_irloc rb_irex_to_irloc=1+(irex-self%row_llim) end function rb_irex_to_irloc !> Set the nrow variables subroutine rb_set_nrow(self) implicit none class(rowblock_type), intent(inout) :: self if(self%row_ulim.le.0)then self%nrow=0 else self%nrow=1+self%row_ulim-self%row_llim endif end subroutine rb_set_nrow !> A routine to reset the object subroutine rb_reset(self) implicit none class(rowblock_type), intent(inout) :: self !deallocate call self%deallocate !Could zero out variables but no real need self%row_llim=0 self%row_ulim=0 end subroutine rb_reset !------------------------------------------------------------------------ !================ !CELL !================ !> Allocate storage space subroutine c_allocate(self) implicit none class(cell_type), intent(inout) :: self integer :: ir allocate(self%tmp_sum(self%nrow)) do ir=1,self%nrb call self%rb(ir)%allocate enddo end subroutine c_allocate !> Deallocate storage space subroutine c_deallocate(self) implicit none class(cell_type), intent(inout) :: self if(allocated(self%tmp_sum)) deallocate(self%tmp_sum) if(allocated(self%rb)) deallocate(self%rb) end subroutine c_deallocate !> Initialise the members of the cell instance subroutine c_init(self, ic, it, is, ik, nfield, nrow, nextend, ignore_boundary) use theta_grid, only: ntgrid implicit none class(cell_type), intent(in out) :: self integer, intent(in) :: ic, it, is, ik, nfield, nrow, nextend logical, intent(in) :: ignore_boundary integer :: ifq ! Store basic cell properties self%nrb = nfield self%ic_ind = ic self%it_ind = it self%ignore_boundary = ignore_boundary ! Store parent properties self%is_ind = is self%ik_ind = ik ! Store related/derived properties self%row_llim = 1 self%row_ulim = nrow self%nrow = nrow self%ncol = 2 * ntgrid if (.not. ignore_boundary) self%ncol = self%ncol + 1 allocate (self%col_llim(nfield), self%col_ulim(nfield)) do ifq = 1, nfield self%col_llim(ifq) = 1 + (ic - 1) + (2 * ntgrid) + (ifq - 1) * nrow self%col_ulim(ifq) = self%col_llim(ifq) + self%ncol - 1 end do self%ncol_tot = self%ncol * nfield allocate (self%rb(self%nrb)) !Now make row block objects do ifq = 1, nfield call self%rb(ifq)%init(ifq, ic, it, is, ik, self%ncol, nextend) end do end subroutine c_init !> Debug printing subroutine c_debug_print(self) implicit none class(cell_type), intent(in) :: self write(dlun,'("Cell debug print. Index ic=",I0," nrow=",I0," ncol=",I0)') & self%ic_ind, self%nrow, self%ncol end subroutine c_debug_print !> Do matrix vector multiplication at rowblock level subroutine c_mv_mult_rb(self,vect,ifq) implicit none class(cell_type), intent(inout) :: self complex, dimension(self%ncol), intent(in) :: vect integer, intent(in) :: ifq call self%rb(ifq)%mv_mult(vect) end subroutine c_mv_mult_rb !> Get the field update for this cells data !! Note still need to reduce across other cells in this !! supercell. subroutine c_get_field_update(self,fq,fqa,fqp) use run_parameters, only: has_phi, has_apar, has_bpar implicit none class(cell_type), intent(inout) :: self complex, dimension(self%ncol), intent(in) :: fq,fqa,fqp integer :: ifq !If we don't have any data then exit if(self%is_empty) return !First do the multiplication at rowblock level ifq=0 if(has_phi) then ifq=ifq+1 call self%mv_mult_rb(fq,ifq) endif if(has_apar) then ifq=ifq+1 call self%mv_mult_rb(fqa,ifq) endif if(has_bpar) then ifq=ifq+1 call self%mv_mult_rb(fqp,ifq) endif !Now store at cell level self%tmp_sum=0 do ifq = 1, nfield self%tmp_sum(self%rb(ifq)%row_llim:self%rb(ifq)%row_ulim)=& self%tmp_sum(self%rb(ifq)%row_llim:self%rb(ifq)%row_ulim)& -self%rb(ifq)%tmp_sum enddo end subroutine c_get_field_update !> A routine to reset the object subroutine c_reset(self) implicit none class(cell_type), intent(inout) :: self integer :: ir !Call reset on all children do ir=1,self%nrb call self%rb(ir)%reset enddo !deallocate call self%deallocate !Could zero out variables but no real need end subroutine c_reset !> Set the locality of each object subroutine c_set_locality(self) use kt_grids, only: aky, akx implicit none class(cell_type), intent(inout) :: self !Set our locality. Note here we assume all rb have same size if(size(self%rb).gt.0)then self%is_empty=self%rb(1)%nrow.eq.0 self%is_all_local=self%rb(1)%nrow.eq.self%nrow else self%is_empty=.true. self%is_all_local=.false. endif !/Also ignore the ky=kx=0 mode self%is_empty=(self%is_empty.or.(abs(aky(self%ik_ind)).lt.epsilon(0.0).and.abs(akx(self%it_ind)).lt.epsilon(0.0))) self%is_local=pc%is_local(self%it_ind,self%ik_ind).eq.1 end subroutine c_set_locality !> Test if a given row belongs to the current cell function c_has_row(self,irow) implicit none class(cell_type), intent(in) :: self integer, intent(in) :: irow logical :: c_has_row !NOTE: Here we assume all row blocks in the cell have the same !row limits if(size(self%rb).gt.0)then c_has_row=(irow.ge.self%rb(1)%row_llim.and.irow.le.self%rb(1)%row_ulim) else c_has_row=.false. endif end function c_has_row !------------------------------------------------------------------------ !================ !SUPERCELL !================ !> Allocate storage space subroutine sc_allocate(self) implicit none class(supercell_type), intent(inout) :: self integer :: ic allocate(self%tmp_sum(self%nrow)) do ic=1,self%ncell call self%cells(ic)%allocate enddo end subroutine sc_allocate !> Deallocate storage space subroutine sc_deallocate(self) implicit none class(supercell_type), intent(inout) :: self if(allocated(self%tmp_sum)) deallocate(self%tmp_sum) if(allocated(self%cells)) deallocate(self%cells) end subroutine sc_deallocate !> Initialise the supercell instance by setting and calculating !> some basic properties. Does not deal with allocating all !> storage etc. subroutine sc_init(self, is, itmin, ik, nfield, nbound) use theta_grid, only: ntgrid use dist_fn, only: itright, supercell_labels, n_cells implicit none class(supercell_type), intent(in out) :: self integer, intent(in) :: is, ik, itmin, nfield integer, intent(in out) :: nbound integer :: it, ic logical :: ignore_boundary self%is_ind = is self%it_leftmost = itmin self%ncell = n_cells(itmin, ik) self%ik_ind = ik self%is_label = supercell_labels(itmin, ik) self%nextend = 1 + 2 * ntgrid * self%ncell self%nrow = self%nextend * nfield self%ncol = self%nrow allocate (self%initialised(self%nextend)) self%initialised = .false. ! Now setup cells allocate (self%cells(self%ncell)) it = itmin do ic = 1, self%ncell !We ignore the boundary for all cells except the rightmost ignore_boundary = ic < self%ncell if (ignore_boundary) nbound = nbound + 1 call self%cells(ic)%init( ic, it, is, ik, nfield, & self%nrow, self%nextend, ignore_boundary) !Update it if (ic < self%ncell) it = itright(it, ik) end do end subroutine sc_init !> Debug printing subroutine sc_debug_print(self) implicit none class(supercell_type), intent(in) :: self write(dlun,'("Supercell debug print. Index is=",I0," nrow=",I0," ncol=",I0)') & self%is_ind, self%nrow, self%ncol end subroutine sc_debug_print !> Get the field update subroutine sc_get_field_update(self,fq,fqa,fqp) implicit none class(supercell_type), intent(inout) :: self complex, dimension(:,:), intent(in) :: fq,fqa,fqp integer :: ic, it_ind, ulim !Exit if we don't have any of the data locally if(.not.self%is_local) return if(.not.self%is_empty)then !Now loop over cells and trigger field update do ic=1,self%ncell !Get cell properties it_ind=self%cells(ic)%it_ind ulim=self%cells(ic)%ncol !Do update call self%cells(ic)%get_field_update(fq(:ulim,it_ind),fqa(:ulim,it_ind),fqp(:ulim,it_ind)) enddo endif !Now we need to reduce across cells call self%reduce_tmpsum end subroutine sc_get_field_update !> Reduce the field update across cells to give the final answer subroutine sc_reduce_tmpsum(self) use mp, only: sum_allreduce_sub, sum_reduce_sub implicit none class(supercell_type), intent(inout) :: self integer :: ic !NOTE: Here we rely on the cells having not reduced/ !gathered their data yet so there are no repeated rows. !First do local sums do ic=1,self%ncell if(self%cells(ic)%is_empty) cycle self%tmp_sum=self%tmp_sum+self%cells(ic)%tmp_sum enddo if(.not.(self%is_empty.or.self%is_all_local)) then call sum_reduce_sub(self%tmp_sum,0,self%sc_sub_pd) end if end subroutine sc_reduce_tmpsum !> Convert the extended domain index to ig, it and ifl !! !! @note The iex_to routines all need testing! subroutine sc_iex_to_dims(self,iex,ig,ic,it,ifl) use theta_grid, only: ntgrid implicit none class(supercell_type), intent(in) :: self integer, intent(in) :: iex integer, intent(out) :: ig,ic,it,ifl !Get field index ifl=1+int((iex-1)/self%nextend) !Get cell and it index ic=min(self%ncell,1+int((iex-(ifl-1)*self%nextend-1)/(2*ntgrid))) it=self%cells(ic)%it_ind !Get ig index ig=-ntgrid+iex-(ifl-1)*self%nextend-1-2*ntgrid*(ic-1) end subroutine sc_iex_to_dims !> Convert the extended domain index to ifl subroutine sc_iex_to_ifl(self,iex,ifl) implicit none class(supercell_type), intent(in) :: self integer, intent(in) :: iex integer, intent(out) :: ifl ifl=1+(iex-1)/self%nextend end subroutine sc_iex_to_ifl !> Convert the extended domain index to ic subroutine sc_iex_to_ic(self,iex,ic) use theta_grid, only: ntgrid implicit none class(supercell_type), intent(in) :: self integer, intent(in) :: iex integer, intent(out) :: ic integer :: ifl, iex_tmp !First work out how many fields there are call self%iex_to_ifl(iex,ifl) !Ensure we're looking between 1 and self%nextend iex_tmp=iex-(ifl-1)*self%nextend ic=MIN(int((iex_tmp-1)/(2*ntgrid))+1,self%ncell) end subroutine sc_iex_to_ic !> Convert the extended domain index to it subroutine sc_iex_to_it(self,iex,it) use theta_grid, only: ntgrid implicit none class(supercell_type), intent(in) :: self integer, intent(in) :: iex integer, intent(out) :: it integer :: ifl, iex_tmp, ic !First work out how many fields there are call self%iex_to_ifl(iex,ifl) !Ensure we're looking between 1 and self%nextend iex_tmp=iex-(ifl-1)*self%nextend ic=MIN(int((iex_tmp-1)/(2*ntgrid))+1,self%ncell) it=self%cells(ic)%it_ind end subroutine sc_iex_to_it !> Convert the extended domain index to ig subroutine sc_iex_to_ig(self,iex,ig) use theta_grid, only: ntgrid implicit none class(supercell_type), intent(in) :: self integer, intent(in) :: iex integer, intent(out) :: ig integer :: iex_tmp, ifl !First get ifl call self%iex_to_ifl(iex,ifl) !Limit to 1 and self%nextend iex_tmp=iex-(ifl-1)*self%nextend if(iex_tmp.eq.self%nextend) then ig=ntgrid else ig=-ntgrid+mod(iex_tmp-1,2*ntgrid) endif end subroutine sc_iex_to_ig !>Is the passed it a member of this supercell function sc_has_it(self,it) implicit none class(supercell_type), intent(in) :: self integer, intent(in) :: it logical :: sc_has_it sc_has_it=any(self%cells%it_ind.eq.it) end function sc_has_it !> A routine to reset the object subroutine sc_reset(self) use mp, only: free_comm implicit none class(supercell_type), intent(inout) :: self integer :: ic !Call reset on all children do ic=1,self%ncell call self%cells(ic)%reset enddo !AJ Free up communicators created for this supercell if(self%sc_sub_not_full%nproc.gt.0) then call free_comm(self%sc_sub_not_full) end if if(self%sc_sub_all%nproc.gt.0 .and. self%sc_sub_all%id.ne.self%parent_sub%id) then call free_comm(self%sc_sub_all) end if if(self%sc_sub_pd%nproc.gt.0) then call free_comm(self%sc_sub_pd) end if !deallocate call self%deallocate !Could zero out variables but no real need end subroutine sc_reset !> Set the locality of each object subroutine sc_set_locality(self) implicit none class(supercell_type), intent(inout) :: self integer :: ic !Set the locality of children do ic=1,self%ncell call self%cells(ic)%set_locality enddo !Set our locality self%is_empty=all(self%cells%is_empty) self%is_local=any(self%cells%is_local) self%is_all_local=all(self%cells%is_all_local) end subroutine sc_set_locality !> Given an it value get the it of the left connected cell function sc_get_left_it(self,it) implicit none class(supercell_type),intent(in) :: self integer,intent(in)::it integer :: sc_get_left_it, ic, tmp !Default to no connection tmp=-1 !Look for early exit if((.not.self%has_it(it)).or.(self%ncell.eq.1)) then tmp=-1 else !Now see which cell has this it do ic=1,self%ncell if(self%cells(ic)%it_ind.eq.it) then tmp=ic-1 exit endif enddo !Now set appropriate value if(tmp.le.0) then tmp=-1 else tmp=self%cells(tmp)%it_ind endif endif !Update result sc_get_left_it=tmp end function sc_get_left_it !> Debug routine to dump the current supercell subroutine sc_dump(self,prefix) use mp, only: proc0 use optionals, only: get_option_with_default implicit none class(supercell_type), intent(inout) :: self character(len=*), optional, intent(in) :: prefix character(len=80) :: fname integer :: lun=24 complex, dimension(:,:), allocatable :: tmp !Make filename write(fname,'(A,"sc_is_",I0,"_ik_",I0,".dat")') & trim(get_option_with_default(prefix, '')), self%is_ind, self%ik_ind !Allocate array and fetch data allocate(tmp(self%ncol,self%nrow)) !Fetch data call self%pull_rows_to_arr(tmp) !Now we've fetched data we can write if(proc0) then !Write open(unit=lun,file=fname,status='replace',form="unformatted") write(lun) tmp close(lun) endif !Free memory deallocate(tmp) end subroutine sc_dump !> Store the field equations at row level subroutine sc_store_fq(self,fq,fqa,fqp,ifl_in,it_in,ig_in) use theta_grid, only: ntgrid use run_parameters, only: has_phi, has_apar, has_bpar implicit none class(supercell_type), intent(inout) :: self complex, dimension(:,:,:), intent(in) :: fq, fqa, fqp integer, intent(in) :: ifl_in, it_in, ig_in integer :: ic, ic_in, irow, ifq, ulim, ir !If we don't have anything in this supercell then exit if(self%is_empty) return !Find out which cell has our it ic_in=0 do ic=1,self%ncell if(self%cells(ic)%it_ind.eq.it_in) then ic_in=ic exit endif enddo !Work out the row we want to put data in irow=(ig_in+ntgrid+1)+(ic_in-1)*(2*ntgrid)+(ifl_in-1)*self%nextend !Loop over cells do ic=1,self%ncell !If we don't have this cell cycle if(self%cells(ic)%is_empty) then cycle endif !Check if we have this row, if not cycle if(.not.self%cells(ic)%has_row(irow)) then cycle endif !Find upper limit of column ulim=self%cells(ic)%ncol ifq=0 if(has_phi)then !Increment counter ifq=ifq+1 !Convert extended irow to local ir=self%cells(ic)%rb(ifq)%irex_to_ir(irow) !Store data self%cells(ic)%rb(ifq)%data(:,ir)=fq(:ulim,self%cells(ic)%it_ind,self%cells(ic)%ik_ind) endif if(has_apar)then !Increment counter ifq=ifq+1 !Convert extended irow to local ir=self%cells(ic)%rb(ifq)%irex_to_ir(irow) !Store data self%cells(ic)%rb(ifq)%data(:,ir)=fqa(:ulim,self%cells(ic)%it_ind,self%cells(ic)%ik_ind) endif if(has_bpar)then !Increment counter ifq=ifq+1 !Convert extended irow to local ir=self%cells(ic)%rb(ifq)%irex_to_ir(irow) !Store data self%cells(ic)%rb(ifq)%data(:,ir)=fqp(:ulim,self%cells(ic)%it_ind,self%cells(ic)%ik_ind) endif enddo end subroutine sc_store_fq !> Prepare the field matrix for calculating field updates subroutine sc_prepare(self,prepare_type) use mp, only: mp_abort implicit none class(supercell_type), intent(inout) :: self integer, intent(in) :: prepare_type character (len=40) :: errmesg !Exit early if we're empty if(self%is_empty) return if(.not.self%is_local) return !Need to decide what method we're using to prepare the !field matrix select case(prepare_type) case(0) !Invert call self%invert case default write(errmesg,'("ERROR: Invalid prepare_type : ",I0)') prepare_type call mp_abort(trim(errmesg)) end select end subroutine sc_prepare !> A routine to invert the field matrix subroutine sc_invert(self) use job_manage, only: time_message use fields_arrays, only: time_field_invert, time_field_invert_mpi use mp, only: get_mp_times implicit none class(supercell_type), intent(inout) :: self real :: mp_total, mp_total_after if(self%is_empty) return if(.not.self%is_local) return call time_message(.false.,time_field_invert,' Field Matrix Invert') call get_mp_times(total_time = mp_total) !If we have all of the supercell local then use an explictly local !matrix inversion !Note could have a variable which forces local inversion if(self%is_all_local.or.pc%force_local_invert) then call self%invert_local else call self%invert_mpi endif call time_message(.false.,time_field_invert,' Field Matrix Invert') call get_mp_times(total_time = mp_total_after) time_field_invert_mpi = time_field_invert_mpi + (mp_total_after - mp_total) end subroutine sc_invert !> A routine to invert the field matrix locally subroutine sc_invert_local(self) use mat_inv, only: invert_serial use mp, only: broadcast_sub use file_utils, only: error_unit use gs2_time, only: code_dt implicit none class(supercell_type), intent(inout) :: self complex, dimension(:,:), allocatable :: tmp_arr real :: condition_number real, parameter :: condition_number_warning_limit = sqrt(1.0/epsilon(condition_number)) character(len = 100) :: condition_number_message !Exit early if possible if(self%is_empty) return if(.not.self%is_local) return !Allocate temporary array allocate(tmp_arr(self%ncol,self%nrow)) !First have to pull all row level data call self%pull_rows_to_arr(tmp_arr) !It may be better to hook into the GK_VERBOSITY level system !rather than relying on the debug flag. if (self%is_head) then condition_number = l2norm(tmp_arr) end if !Now invert in place if (self%is_head) then call invert_serial(tmp_arr, self%nrow) tmp_arr=transpose(tmp_arr) end if !It would be quite nice if we could do the following in a non-blocking manner !but this would require us to either keep the potentially large tmp_arr allocated !for all supercells or for us to be able to do this "in-place" (i.e. we use something !like non-blocking scatterv rather than broadcast). if (.not. self%is_all_local) then if (self%sc_sub_pd%nproc > 0) call broadcast_sub(tmp_arr, & self%head_iproc_pd, self%sc_sub_pd%id) end if if (self%is_head) then condition_number = condition_number * l2norm(tmp_arr) write(condition_number_message,'("Condition number for response with ik = ",I0,", is = ",I0," and dt = ",E14.6E3," is ",E14.6E3)') self%ik_ind, self%is_ind, code_dt, condition_number if (condition_number > condition_number_warning_limit) then write(error_unit(), '("Warning: ",A)') trim(condition_number_message) end if !In debug mode always display the condition number message to screen if (debug) then write(*,'(A)') trim(condition_number_message) end if end if !Now push back to row level call self%push_arr_to_rows(tmp_arr) !Free memory deallocate(tmp_arr) end subroutine sc_invert_local !> A routine to invert the field matrix using mpi subroutine sc_invert_mpi(self) use mp, only: mp_abort implicit none class(supercell_type), intent(inout) :: self self%is_empty=.true. !Until implemented make everything be treated as empty. call mp_abort("ERROR: Invert with mpi not yet implemented.") end subroutine sc_invert_mpi !> A routine to collect all the row level data and store in passed array !! Gather the row blocks up for this cell to fill an array subroutine sc_pull_rows_to_arr(self,arr) !AJ Nonblocking? use mp, only: sum_allreduce_sub implicit none class(supercell_type), intent(in) :: self complex, dimension(:,:), intent(out) :: arr !Note: Dimensions should be self%ncol, self%nrow integer :: ic, rl,ru, cl,cu, irb ! Zero out arr |--> Not needed if gathering ! This may look like it is only needed for the sum_allreduce_sub branch below, ! however without this line we would end up not setting arr in cases where we ! think of this supercell as empty. This is usually fine, but for the ky=kx=0 ! mode all processors consider the supercell to be empty, but we still need to ! initialise this array in some cases (e.g. when dumping to file). We could ! attempt to only do this zeroing in this special case to save some time, but ! we don't expect to be calling this routine much so opt for simpler approach. arr=0 !Now see if we want to exit if(self%is_empty) return if(.not.self%is_local) return !If we're all local then just do things simply as we don't need comms if(self%is_all_local) then do ic=1,self%ncell do irb=1,self%cells(ic)%nrb rl=self%cells(ic)%rb(irb)%row_llim ru=self%cells(ic)%rb(irb)%row_ulim cl=self%cells(ic)%rb(irb)%col_llim cu=self%cells(ic)%rb(irb)%col_ulim arr(cl:cu,rl:ru)=self%cells(ic)%rb(irb)%data enddo enddo else !Place local piece in arr do ic=1,self%ncell if(self%cells(ic)%is_empty) cycle do irb=1,self%cells(ic)%nrb rl=self%cells(ic)%rb(irb)%row_llim ru=self%cells(ic)%rb(irb)%row_ulim cl=self%cells(ic)%rb(irb)%col_llim cu=self%cells(ic)%rb(irb)%col_ulim arr(cl:cu,rl:ru)=self%cells(ic)%rb(irb)%data enddo enddo !Now use MPI to get the rest of the data from other procs !<DD>FOR NOW USE ALL_REDUCE AS EASIER, BUT SHOULD BE ABLE !TO DO THIS WITH ALL_GATHERV WHICH SHOULD BE FASTER if(self%sc_sub_pd%nproc.gt.0) call sum_allreduce_sub(arr,self%sc_sub_pd%id) endif end subroutine sc_pull_rows_to_arr !> A routine to distribute an array to appropriate row blocks subroutine sc_push_arr_to_rows(self,arr) implicit none class(supercell_type), intent(inout) :: self complex, dimension(self%nrow,self%ncol), intent(in) :: arr integer :: ic, ir integer :: rl,ru,cl,cu !If empty don't have anywhere to store data so exit if(self%is_empty) return !Store all local sections do ic=1,self%ncell if(self%cells(ic)%is_empty) cycle do ir=1,self%cells(ic)%nrb !Get row/col limits rl=self%cells(ic)%rb(ir)%row_llim ru=self%cells(ic)%rb(ir)%row_ulim cl=self%cells(ic)%rb(ir)%col_llim cu=self%cells(ic)%rb(ir)%col_ulim !Store data self%cells(ic)%rb(ir)%data=arr(cl:cu,rl:ru) enddo enddo end subroutine sc_push_arr_to_rows !> Create primary (top level) sub communicators subroutine sc_make_subcom_1(self) use mp, only: comm_type, split, free_comm implicit none class(supercell_type), intent(inout) :: self integer :: ic, colour type(comm_type) :: tmp !/First make a subcommunicator involving any procs !which can see this supercell colour=0 if(self%is_local)colour=1 call split(colour,tmp,self%parent_sub%id) !Note we only store the subcom for those procs which will use it !this ensures an error will occur if we try to use the subcom in !the wrong place if(self%is_local)then !Here we check if the new subcom has same number of members as !parent, if so we can carry on using the parent instead if(tmp%nproc.eq.self%parent_sub%nproc) then self%sc_sub_all=self%parent_sub call free_comm(tmp) else self%sc_sub_all=tmp endif else !If we can't see any of this object then !Destroy communicator call free_comm(tmp) !and return return endif !Now make the supercell sub communicators do ic=1,self%ncell !Set parent subcom self%cells(ic)%parent_sub=self%sc_sub_all enddo end subroutine sc_make_subcom_1 !> Create the secondary (intraobject) subcommunicators subroutine sc_make_subcom_2(self) use mp, only: comm_type, split, free_comm, sum_allreduce_sub, proc0, iproc implicit none class(supercell_type), intent(inout) :: self integer :: colour, key, cc type(comm_type) :: tmp logical :: some_empty, some_pd, some_all, head_notfull !Exit if we don't have this supercell if(.not.self%is_local)return !/Work out if we have any empty procs some_empty=.false. colour=0 if(self%is_empty)colour=1 call sum_allreduce_sub(colour,self%sc_sub_all%id) if(colour.ne.0) some_empty=.true. !/Work out if we have any partial data procs some_pd=.false. colour=0 if(.not.(self%is_empty.or.self%is_all_local))colour=1 call sum_allreduce_sub(colour,self%sc_sub_all%id) if(colour.ne.0) some_pd=.true. !/Work out if we have any all local procs some_all=.false. colour=0 if(self%is_all_local)colour=1 call sum_allreduce_sub(colour,self%sc_sub_all%id) if(colour.ne.0) some_all=.true. !/Now make a subcommunicator involving all procs !which have part of the supercell data range (but not all of it) if(some_pd)then colour=0 if((.not.(self%is_empty.or.self%is_all_local))) colour=1 call split(colour,tmp,self%sc_sub_all%id) if(colour.eq.0)then !/Destroy communicator call free_comm(tmp) else !/Store communicator self%sc_sub_pd=tmp endif endif !Decide if we're the head proc !Note: We also decide which proc is in charge of filling the empty !procs with data. By allowing this to be different from the head proc !we have a better chance of being able to overlap comms and computations !Note: We try to make proc0 the head proc if it is a member of the supercell !which has data as it means we don't need to send this supercell to anyone. if(some_all)then !Here we make a temporary communicator for the all_local procs cc=0 if(self%is_all_local) cc=1 !/Note we don't destroy this until later call split(cc,tmp,self%sc_sub_all%id) !Now find out if we have proc0 in our new comm if(self%is_all_local)then cc=0 if(proc0) cc=1 call sum_allreduce_sub(cc,tmp%id) !Decide if we're the head, prefer proc0 to be head !if it's a member of our group if(cc.ne.0)then self%is_head=proc0 else self%is_head=tmp%proc0 endif !Here we decide which proc should be in charge of !informing the empty procs of the result head_notfull=(min(1,tmp%nproc-1).eq.tmp%iproc) else head_notfull=.false. endif !Free the communicator call free_comm(tmp) else if(self%sc_sub_pd%nproc.gt.0)then cc=0 if(proc0) cc=1 call sum_allreduce_sub(cc,self%sc_sub_pd%id) !Decide if we're the head, prefer proc0 to be head !if it's a member of our group if(cc.ne.0)then self%is_head=proc0 else self%is_head=self%sc_sub_pd%proc0 endif endif !Here we decide which proc should be in charge of !informing the empty procs of the result head_notfull=(min(1,self%sc_sub_pd%nproc-1).eq.self%sc_sub_pd%iproc) endif !Store the head_iproc self%head_iproc_world=0 if(self%is_head) self%head_iproc_world=iproc call sum_allreduce_sub(self%head_iproc_world,self%sc_sub_all%id) self%head_iproc=0 if(self%is_head) self%head_iproc=self%sc_sub_all%iproc call sum_allreduce_sub(self%head_iproc,self%sc_sub_all%id) self%head_iproc_pd=0 if(self%is_head) self%head_iproc_pd=self%sc_sub_pd%iproc call sum_allreduce_sub(self%head_iproc_pd,self%sc_sub_all%id) !/Now make a subcommunicator involving all empty procs and !head pd proc if(some_empty)then !/Default values colour=0 key=2 !/Join if we're empty and NOT proc0 !This is done as we handle the filling of proc0 !separately and by actively excluding it we aim to !prevent stalls waiting for proc0 to catch up if(self%is_empty.and.(.not.proc0)) colour=1 if(head_notfull) then colour=1 key=0 endif !Here we use key to make sure the proc with data is proc0 call split(colour,key,tmp,self%sc_sub_all%id) if(colour.eq.0)then !/Free communicator call free_comm(tmp) else if(tmp%nproc.gt.1)then !/Store communicator self%sc_sub_not_full=tmp !/Allocate array used to store non blocking !comm handles if(tmp%proc0) then allocate(self%nb_req_hand(tmp%nproc-1)) else allocate(self%nb_req_hand(1)) endif else call free_comm(tmp) endif endif endif end subroutine sc_make_subcom_2 !------------------------------------------------------------------------ !================ !KYBLOCK !================ !> Allocate storage space subroutine ky_allocate(self) implicit none class(ky_type), intent(inout) :: self integer :: is do is=1,self%nsupercell call self%supercells(is)%allocate enddo end subroutine ky_allocate !> Deallocate storage space subroutine ky_deallocate(self) implicit none class(ky_type), intent(inout) :: self if(allocated(self%supercells)) deallocate(self%supercells) end subroutine ky_deallocate !> Setup properties of ky_type instance subroutine ky_init(self, ik, itmins, nsupercell, nfield, nbound) implicit none class(ky_type), intent(in out) :: self integer, intent(in) :: ik, nsupercell, nfield integer, dimension(:), intent(in) :: itmins integer, intent(in out) :: nbound integer :: is self%ik_ind = ik self%nsupercell = nsupercell allocate (self%supercells(nsupercell)) do is = 1, nsupercell call self%supercells(is)%init(is, itmins(is), ik, nfield, nbound) end do end subroutine ky_init !> Debug printing subroutine ky_debug_print(self) implicit none class(ky_type), intent(in) :: self write(dlun,'("Ky block debug print. Index ik=",I0," nsupercell=",I0)') & self%ik_ind, self%nsupercell end subroutine ky_debug_print !> Get the field update for this ik subroutine ky_get_field_update(self,fq,fqa,fqp) implicit none class(ky_type), intent(inout) :: self complex, dimension(:,:), intent(in) :: fq,fqa,fqp integer :: is !First trigger calculation of field update. !Note: Empty supercells don't do anything do is=1,self%nsupercell !/Calculate the field update call self%supercells(is)%get_field_update(fq,fqa,fqp) enddo end subroutine ky_get_field_update !> Store the field equation at row level subroutine ky_store_fq(self,fq,fqa,fqp,ifl_in,it_in,ig_in) implicit none class(ky_type), intent(inout) :: self complex, dimension(:,:,:), intent(in) :: fq, fqa, fqp integer, intent(in) :: ifl_in, it_in, ig_in integer :: is !If we don't have anything in this supercell then exit if(self%is_empty) return !Delegate work to supercell do is=1,self%nsupercell if(.not.self%supercells(is)%has_it(it_in)) cycle call self%supercells(is)%store_fq(fq,fqa,fqp,ifl_in,it_in,ig_in) enddo end subroutine ky_store_fq !> Given it say what the supercell id is function ky_is_from_it(self,it) implicit none class(ky_type), intent(inout) :: self integer, intent(in) :: it integer :: ky_is_from_it integer :: is ky_is_from_it=-1 do is=1,self%nsupercell if(self%supercells(is)%has_it(it))then ky_is_from_it=is exit endif enddo end function ky_is_from_it !> A routine to reset the object subroutine ky_reset(self) use mp, only: free_comm, mp_comm_null implicit none class(ky_type), intent(inout) :: self integer :: is !Call reset on all children do is=1,self%nsupercell call self%supercells(is)%reset enddo !AJ Free communicators associated with this ky block if(self%ky_sub_all%id .ne. self%parent_sub%id .and. self%ky_sub_all%nproc .gt. 0 .and. self%ky_sub_all%id .ne. mp_comm_null) then call free_comm(self%ky_sub_all) end if !deallocate call self%deallocate !Could zero out variables but no real need end subroutine ky_reset !> Set the locality of each object subroutine ky_set_locality(self) implicit none class(ky_type), intent(inout) :: self integer :: is !Set the locality of children do is=1,self%nsupercell call self%supercells(is)%set_locality enddo !Set our locality self%is_empty=all(self%supercells%is_empty) self%is_local=any(self%supercells%is_local) self%is_all_local=all(self%supercells%is_all_local) end subroutine ky_set_locality !> Prepare the field matrix for calculating field updates subroutine ky_prepare(self,prepare_type) implicit none class(ky_type), intent(inout) :: self integer, intent(in) :: prepare_type integer :: is !Exit early if we're empty if(self%is_empty) return if(.not.self%is_local) return !Tell each supercell to prepare do is=1,self%nsupercell call self%supercells(is)%prepare(prepare_type) enddo end subroutine ky_prepare !> Create the primary subcommunicators subroutine ky_make_subcom_1(self) use mp, only: comm_type, split, free_comm implicit none class(ky_type), intent(inout) :: self integer :: is, colour type(comm_type) :: tmp !/First make a subcommunicator involving any procs !which can see this ky block colour=0 if(self%is_local)colour=1 call split(colour,tmp,self%parent_sub%id) !Note we only store the subcom for those procs which will use it !this ensures an error will occur if we try to use the subcom in !the wrong place if(self%is_local)then if(tmp%nproc.eq.self%parent_sub%nproc) then self%ky_sub_all=self%parent_sub call free_comm(tmp) else self%ky_sub_all=tmp endif else !If we can't see any of this object then !/Destroy communicator call free_comm(tmp) !and return return endif !Now make the supercell sub communicators do is=1,self%nsupercell !Set parent subcom self%supercells(is)%parent_sub=self%ky_sub_all !Now make child subcom call self%supercells(is)%make_subcom_1 enddo end subroutine ky_make_subcom_1 !> Create the secondary subcommunicators subroutine ky_make_subcom_2(self) implicit none class(ky_type), intent(inout) :: self integer :: is !Exit if we don't have this kyb if(.not.self%is_local)return !Now make the supercell sub communicators do is=1,self%nsupercell !Now make child subcom call self%supercells(is)%make_subcom_2 enddo end subroutine ky_make_subcom_2 !------------------------------------------------------------------------ !================ !FIELDMAT !================ !> Allocate storage space subroutine fm_allocate(self) implicit none class(fieldmat_type), intent(inout) :: self integer :: ik allocate(self%heads(self%ntheta0, self%naky)) if(.not.self%is_local) return do ik=1,self%naky call self%kyb(ik)%allocate enddo end subroutine fm_allocate !> Deallocate storage space subroutine fm_deallocate(self) implicit none class(fieldmat_type), intent(inout) :: self if(allocated(self%kyb)) deallocate(self%kyb) if(allocated(self%heads)) deallocate(self%heads) end subroutine fm_deallocate !> Debug printing subroutine fm_debug_print(self) implicit none class(fieldmat_type), intent(in) :: self write(dlun,'("Field debug print. naky=",I0)') & self%naky end subroutine fm_debug_print !> Just work out the locality (also sets is_empty etc. but is not intended for this) subroutine fm_set_is_local(self) implicit none class(fieldmat_type), intent(inout) :: self integer :: ik !Set the locality of children do ik=1,self%naky call self%kyb(ik)%set_locality enddo !Set our locality self%is_empty=all(self%kyb%is_empty) self%is_local=any(self%kyb%is_local) self%is_all_local=all(self%kyb%is_local) end subroutine fm_set_is_local !> Initialise the field objects subroutine fm_init(self) use kt_grids, only: naky, ntheta0 use dist_fn, only: get_leftmost_it, n_supercells use run_parameters, only: has_phi, has_apar, has_bpar use theta_grid, only: ntgrid implicit none class(fieldmat_type), intent(inout) :: self integer :: it, cur_id, it_start, is_count, ik integer, dimension(:), allocatable :: tmp_supercell_ids integer, dimension(:,:), allocatable :: supercell_ids integer, dimension(:,:), allocatable :: is_to_itmin !Count the fields nfield = count([has_phi, has_apar, has_bpar]) !Allocate temp arrays allocate(supercell_ids(ntheta0, naky)) ! Here the first dimension is allocated as ntheta0 ! but for each ky we only use the first nsupercell(ik) ! entries. allocate(is_to_itmin(ntheta0, naky)) !Initialise arrays is_to_itmin = 0 supercell_ids = -1 !Find the leftmost it for each supercell and use this to label the !supercell. Note that this is different from the supercell_label !from dist_fn. do ik = 1, naky do it = 1, ntheta0 supercell_ids(it, ik) = get_leftmost_it(it, ik) end do end do !Now work out how many supercells there are for each !ik and what their properties are. do ik = 1, naky !Store the leftmost it values for current ik tmp_supercell_ids = supercell_ids(:, ik) !Now loop over all it values and count how many !have which it as the leftmost one. it_start = 0 is_count = 0 do while (any(tmp_supercell_ids >= 0)) !Increase the it value it_start = it_start + 1 !Get the leftmost it for supercell with it_start in it cur_id = tmp_supercell_ids(it_start) !If we've seen this supercell then cycle if (cur_id == -1) cycle !Increase supercell counter is_count = is_count + 1 !Store the leftmost it value is_to_itmin(is_count, ik) = cur_id !Set all matching entries to -1 so we know to skip !this supercell again do it = it_start, ntheta0 if (tmp_supercell_ids(it) == cur_id) then tmp_supercell_ids(it) = -1 end if end do end do end do !Free memory deallocate(tmp_supercell_ids) !Now we want to allocate the basic field structure !Store the basic properties self%naky = naky self%ntheta0 = ntheta0 self%npts = naky * ntheta0 * (2 * ntgrid + 1) self%nbound = 0 !Calculated whilst we create objects allocate(self%kyb(naky)) !/Make the kyb objects do ik = 1, naky call self%kyb(ik)%init(ik, is_to_itmin(:, ik), n_supercells(ik), nfield, self%nbound) end do !Free memory deallocate(supercell_ids, is_to_itmin) end subroutine fm_init !> Find the response of g to delta-fn field perturbations !! and store at the row level subroutine fm_populate(self) use run_parameters, only: has_phi, has_apar, has_bpar use fields_arrays, only: phi, apar, bpar, phinew, aparnew, bparnew use dist_fn_arrays, only: g use kt_grids, only: kwork_filter use array_utils, only: zero_array implicit none class(fieldmat_type), intent(inout) :: self integer :: ifl, pts_remain, ik, is !First initialise everything to 0 !AJ This is also done in fields.f90 when arrays are created so we should be !AJ able to remove from here. call zero_array(g) call zero_array(phi) ; call zero_array(phinew) call zero_array(apar) ; call zero_array(aparnew) call zero_array(bpar) ; call zero_array(bparnew) !Initialise field counter ifl=0 !Do phi if(has_phi) then !Set the number of points left pts_remain=self%npts-self%nbound !Increment the field counter ifl=ifl+1 !Reset the filter array kwork_filter=.false. !Reset the init state arrays self%kyb%initdone=.false. do ik=1,self%naky self%kyb(ik)%supercells%initdone=.false. do is=1,self%kyb(ik)%nsupercell self%kyb(ik)%supercells(is)%initialised=.false. enddo enddo !Now loop over all points and initialise do while(pts_remain.gt.0) call self%init_next_field_points(phinew,pts_remain,kwork_filter,ifl) enddo endif !Do apar if(has_apar) then !Set the number of points left pts_remain=self%npts-self%nbound !Increment the field counter ifl=ifl+1 !Reset the filter array kwork_filter=.false. !Reset the init state arrays self%kyb%initdone=.false. do ik=1,self%naky self%kyb(ik)%supercells%initdone=.false. do is=1,self%kyb(ik)%nsupercell self%kyb(ik)%supercells(is)%initialised=.false. enddo enddo !Now loop over all points and initialise do while(pts_remain.gt.0) call self%init_next_field_points(aparnew,pts_remain,kwork_filter,ifl) enddo endif !Do bpar if(has_bpar) then !Set the number of points left pts_remain=self%npts-self%nbound !Increment the field counter ifl=ifl+1 !Reset the filter array kwork_filter=.false. !Reset the init state arrays self%kyb%initdone=.false. do ik=1,self%naky self%kyb(ik)%supercells%initdone=.false. do is=1,self%kyb(ik)%nsupercell self%kyb(ik)%supercells(is)%initialised=.false. enddo enddo !Now loop over all points and initialise do while(pts_remain.gt.0) call self%init_next_field_points(bparnew,pts_remain,kwork_filter,ifl) enddo endif !Reset the filter array kwork_filter=.false. end subroutine fm_populate !> Initialise the next set of delta functions, find the response !! and store in the appropriate rows. subroutine fm_init_next_field_points(self,field,pts_remain,kwork_filter,ifl) use kt_grids, only: naky, ntheta0 use theta_grid, only: ntgrid use dist_fn, only: timeadv use fields_arrays, only: phi, apar, bpar, phinew, aparnew, bparnew use dist_fn_arrays, only: fieldeq, fieldeqa, fieldeqp use mp, only: barrier use array_utils, only: zero_array implicit none class(fieldmat_type), intent(inout) :: self complex, dimension(-ntgrid:ntgrid,ntheta0,naky), intent(inout) :: field integer, intent(inout) :: pts_remain integer, intent(in) :: ifl logical, dimension(ntheta0, naky), intent(inout) :: kwork_filter integer :: ik, is, iex, ic, iex_init, ig, it integer :: it_cur, ig_cur integer :: pts_set_here logical :: found_ig, found_it !AJ This is a nasty barrier, but it is in here as on ARCHER we have a problem !AJ that we get deadlock on large core counts without it. This has been investigated !AJ and looks like a problem with the Cray MPI library, but it's hard to get to the bottom of. !AJ The problem does not occur on small core counts (we see it between 2000 and 3000 cores) call barrier() pts_set_here=0 !Loop over all ik !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(ik, is, ic, iex_init, iex, ig, it) & !$OMP SHARED(self, kwork_filter, field, ntgrid) & !$OMP REDUCTION(+:pts_set_here) & !$OMP SCHEDULE(static) do ik = 1, self%naky !Technically should be able to skip ik values which are not local (though could be empty) !/would have to remember to decrease pts_remain !If we've finished this ky set the !filter and move on to next ik if (self%kyb(ik)%initdone) then kwork_filter(:, ik) = .true. cycle end if !Now look at all supercells with this ik do is = 1, self%kyb(ik)%nsupercell !If we've finished with this supercell !then set the filter and move on to next if (self%kyb(ik)%supercells(is)%initdone) then !Set filter for all it on supercell do ic = 1, self%kyb(ik)%supercells(is)%ncell kwork_filter(self%kyb(ik)%supercells(is)%cells(ic)%it_ind, ik) = .true. end do cycle end if !//If we get to this point then we know that we have at least !one grid point on supercell for which we've not made a delta-fn !Now look for the first point that we've not initialised iex_init = 0 do iex = 1, self%kyb(ik)%supercells(is)%nextend !If we've already done this point then cycle if (self%kyb(ik)%supercells(is)%initialised(iex)) cycle !Else this is the first point so store it and exit iex_init = iex exit end do !If this is the last point then mark supercell as done !but don't set filter yet, this will be automatically !applied on next loop if (iex_init == self%kyb(ik)%supercells(is)%nextend) then self%kyb(ik)%supercells(is)%initdone = .true. endif !Mark this point as initialised self%kyb(ik)%supercells(is)%initialised(iex_init) = .true. !Increment the number of points handled in this call pts_set_here = pts_set_here + 1 !Get the ig and it indices corresponding to this point call self%kyb(ik)%supercells(is)%iex_to_ig(iex_init,ig) call self%kyb(ik)%supercells(is)%iex_to_it(iex_init,it) !Initialise field field(ig, it, ik) = 1.0 !Set the duplicate boundary point if we're at the left !boundary of this call and we are not the leftmost cell in !the chain. if ((ig == -ntgrid) .and. & (it /= self%kyb(ik)%supercells(is)%it_leftmost)) then !Get the left connection it = self%kyb(ik)%supercells(is)%get_left_it(it) field(ntgrid, it, ik) = 1.0 end if end do !Ends loop over supercells !Decide if we've done this kyb self%kyb(ik)%initdone = all(self%kyb(ik)%supercells%initdone) end do !Ends loop over ik !$OMP END PARALLEL DO ! Update the number of points remaining pts_remain = pts_remain - pts_set_here !Now do the time advance call timeadv(phi,apar,bpar,phinew,aparnew,bparnew,0) !Now get the field equations call self%getfieldeq_nogath(phinew,aparnew,bparnew,fieldeq,fieldeqa,fieldeqp) !Now we need to store the field equations in the appropriate places !$OMP PARALLEL DO DEFAULT(none) & !$OMP PRIVATE(ik, ig_cur, it_cur, found_ig, found_it, it, is) & !$OMP SHARED(self, field, ntgrid, ntheta0, ifl, fieldeq, fieldeqa, fieldeqp) & !$OMP SCHEDULE(static) do ik = 1, self%naky !If no data then skip ik -- because we don't skip setting !the field in the first loop of this routine but we do skip !zeroing it out here on processors which don't have any interest !in this ky, we need to zero the field after this loop to !ensure it has been fully reset to 0 on all procs. We might !consider skipping the setting of the field in the first loop !instead, although this could mean the field is different on !different processors (which should be fine) and might complicate !how we track the number of points we need to visit etc. if (self%kyb(ik)%is_empty) cycle !Loop until we've done all supercells ! Note we dont use gs2_max_abs here as we are already inside ! an OpenMP region. do while (maxval(abs(field(: , : ,ik))) > 0) !Reset variables ig_cur = -ntgrid-1 it_cur = 0 found_ig = .false. found_it = .false. !Look for first entry which was set to a delta-fn theta0: do it = 1, ntheta0 ! Find the first theta grid point equal to 1.0 ig_cur = findloc(field(:, it, ik), cmplx(1.0, 0.0), dim = 1) found_ig = ig_cur > 0 ! If we didn't find a theta grid value move on to ! next it if (.not. found_ig) cycle ! Adjust ig_cur to correct index range (findloc ! assumes arrays are 1 indexed) ig_cur = ig_cur - (ntgrid + 1) if (found_ig) then it_cur = it found_it = .true. exit theta0 end if end do theta0 !Now check we've found a point, else cycle !NOTE: If this triggers we're probably in trouble !as nothing will be different on the next iteration !and hence we will be stuck in an infinite loop. !As such this should really trigger an abort. if (.not. found_it) cycle !Zero out the field at this point so we ignore !it on future iterations field(ig_cur, it_cur, ik)=0.0 !Find the appropriate supercell is = self%kyb(ik)%is_from_it(it_cur) !Also zero out the boundary point if needed if((ig_cur == -ntgrid) .and. & (it_cur /= self%kyb(ik)%supercells(is)%it_leftmost)) then it = self%kyb(ik)%supercells(is)%get_left_it(it_cur) field(ntgrid, it, ik) = 0.0 end if !Now store data call self%kyb(ik)%store_fq(fieldeq, fieldeqa, fieldeqp, ifl, it_cur, ig_cur) end do !Ends loop over points end do !Ends loop over ky !$OMP END PARALLEL DO ! See comment in above loop for why this is needed call zero_array(field) end subroutine fm_init_next_field_points !> Prepare the field matrix for calculating field updates subroutine fm_prepare(self) implicit none class(fieldmat_type), intent(inout) :: self integer :: ik !Exit early if we're empty if(self%is_empty) return if(.not.self%is_local) return !Tell each ky to prepare do ik=1,self%naky call self%kyb(ik)%prepare(self%prepare_type) enddo end subroutine fm_prepare !> A routine to calculate the update to the fields subroutine fm_get_field_update(self, phi, apar, bpar) use dist_fn, only: getfieldeq, getfieldeq_nogath use dist_fn_arrays, only: fieldeq, fieldeqa, fieldeqp implicit none class(fieldmat_type), intent(inout) :: self complex, dimension(:,:,:), intent(inout) :: phi,apar,bpar integer :: ik, is do ik=1,self%naky if(.not.self%kyb(ik)%is_local) cycle do is=1,self%kyb(ik)%nsupercell if(.not.self%kyb(ik)%supercells(is)%is_local) cycle self%kyb(ik)%supercells(is)%tmp_sum=0.0 enddo enddo !First get the field eq call self%getfieldeq_nogath(phi,apar,bpar,fieldeq,fieldeqa,fieldeqp) !Now get the field update for each ik do ik=1,self%naky !Skip non-local ky if(.not.self%kyb(ik)%is_local) cycle !Trigger the supercell field updates !AJ Non-blocking can be done here call self%kyb(ik)%get_field_update(fieldeq(:,:,ik),fieldeqa(:,:,ik),fieldeqp(:,:,ik)) enddo end subroutine fm_get_field_update !> A routine to unpack the supercell tmp_sum vectors to !! full field arrays subroutine fm_unpack_to_field(self,ph,ap,bp) use theta_grid, only: ntgrid use run_parameters, only: has_phi, has_apar, has_bpar use mp, only: proc0 implicit none class(fieldmat_type), intent(inout) :: self complex,dimension(-ntgrid:,:,:), intent(inout) :: ph,ap,bp integer :: ik,is,ic,iex,ig,it,bnd,ifl !We don't need to unpack if we're proc0 as we've done this already if(.not.proc0)then do ik=1,self%naky if(.not.self%kyb(ik)%is_local) cycle do is=1,self%kyb(ik)%nsupercell if(.not.self%kyb(ik)%supercells(is)%is_local) cycle !Finalise the receives ! call self%kyb(ik)%supercells(is)%gfu_finish_recv iex=0 ifl=0 if(has_phi)then ifl=ifl+1 do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind bnd=0 if(ic.ne.self%kyb(ik)%supercells(is)%ncell) bnd=1 do ig=-ntgrid,ntgrid-bnd iex=iex+1 ph(ig,it,ik)=self%kyb(ik)%supercells(is)%tmp_sum(iex) enddo enddo !/Fix boundary points if(self%kyb(ik)%supercells(is)%ncell.gt.1) then do ic=1,self%kyb(ik)%supercells(is)%ncell-1 ph(ntgrid,self%kyb(ik)%supercells(is)%cells(ic)%it_ind,ik)=& ph(-ntgrid,self%kyb(ik)%supercells(is)%cells(ic+1)%it_ind,ik) enddo endif endif if(has_apar)then ifl=ifl+1 do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind bnd=0 if(ic.ne.self%kyb(ik)%supercells(is)%ncell) bnd=1 do ig=-ntgrid,ntgrid-bnd iex=iex+1 ap(ig,it,ik)=self%kyb(ik)%supercells(is)%tmp_sum(iex) enddo enddo !/Fix boundary points if(self%kyb(ik)%supercells(is)%ncell.gt.1) then do ic=1,self%kyb(ik)%supercells(is)%ncell-1 ap(ntgrid,self%kyb(ik)%supercells(is)%cells(ic)%it_ind,ik)=& ap(-ntgrid,self%kyb(ik)%supercells(is)%cells(ic+1)%it_ind,ik) enddo endif endif if(has_bpar)then ifl=ifl+1 do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind bnd=0 if(ic.ne.self%kyb(ik)%supercells(is)%ncell) bnd=1 do ig=-ntgrid,ntgrid-bnd iex=iex+1 bp(ig,it,ik)=self%kyb(ik)%supercells(is)%tmp_sum(iex) enddo enddo !/Fix boundary points if(self%kyb(ik)%supercells(is)%ncell.gt.1) then do ic=1,self%kyb(ik)%supercells(is)%ncell-1 bp(ntgrid,self%kyb(ik)%supercells(is)%cells(ic)%it_ind,ik)=& bp(-ntgrid,self%kyb(ik)%supercells(is)%cells(ic+1)%it_ind,ik) enddo endif endif enddo enddo endif end subroutine fm_unpack_to_field !> A routine to reset the object subroutine fm_reset(self) use mp, only: free_comm implicit none class(fieldmat_type), intent(inout) :: self integer :: ik !Call reset on all children do ik=1,self%naky call self%kyb(ik)%reset enddo !deallocate call self%deallocate !AJ Free the communicators associated with this field matrix object. if(self%fm_sub_headsc_p0%nproc.gt.0) then call free_comm(self%fm_sub_headsc_p0) end if !Could zero out variables but no real need end subroutine fm_reset !> Count how many subcommunicators will be created subroutine fm_count_subcom(self) use mp, only: iproc, barrier, nproc implicit none class(fieldmat_type), intent(inout) :: self integer :: ik, is, ip integer :: nsub_tot, nsub_proc !Initialise nsub_tot=2 !Subcom belonging to fieldmat object nsub_proc=nsub_tot !Now get children do ik=1,self%naky nsub_tot=nsub_tot+2 if(self%kyb(ik)%is_local) nsub_proc=nsub_proc+2 do is=1,self%kyb(ik)%nsupercell nsub_tot=nsub_tot+2 if(self%kyb(ik)%supercells(is)%is_local) nsub_proc=nsub_proc+2 enddo enddo !Now report if(iproc.eq.0) write(dlun,'("Total number of subcommunicators is ",I0)') nsub_tot do ip=0,nproc-1 if(ip.eq.iproc) write(dlun,'("Number of subcommunicators on ",I0," is ",I0)') iproc, nsub_proc call barrier enddo if(iproc.eq.0)write(dlun,'()') end subroutine fm_count_subcom !> Create all the necessary subcommunicators subroutine fm_make_subcom_1(self) use mp, only: iproc, nproc, mp_comm, comm_type, split, free_comm, proc0 implicit none class(fieldmat_type), intent(inout) :: self integer :: ik !Set the all comm to be mp_comm self%fm_sub_all%id=mp_comm self%fm_sub_all%iproc=iproc self%fm_sub_all%nproc=nproc self%fm_sub_all%proc0=proc0 !Set the parents and make their subcommunicators do ik=1,self%naky !Set parent subcom self%kyb(ik)%parent_sub=self%fm_sub_all !Make individual subcom call self%kyb(ik)%make_subcom_1 enddo end subroutine fm_make_subcom_1 !> Create the secondary subcommunicators subroutine fm_make_subcom_2(self) use mp, only: split, free_comm implicit none class(fieldmat_type), intent(inout) :: self integer :: ik, colour type(comm_type) :: tmp logical :: has_head_sc !Make child subcommunicators do ik=1,self%naky !Make individual subcom call self%kyb(ik)%make_subcom_2 enddo !Now make the supercell head communicator used for field gathers !/First work out if this proc is the head of any supercells has_head_sc=.false. do ik=1,self%naky has_head_sc=has_head_sc.or.any(self%kyb(ik)%supercells%is_head) enddo !/Now split to make a subcom colour=0 if(has_head_sc.or.self%fm_sub_all%proc0) colour=1 call split(colour,tmp,self%fm_sub_all%id) if(has_head_sc.or.self%fm_sub_all%proc0) then !/Store subcom self%fm_sub_headsc_p0=tmp else self%fm_sub_headsc_p0%nproc=0 !/Destroy subcom call free_comm(tmp) endif end subroutine fm_make_subcom_2 !> Record the process that is the supercell head associated with a given ik,it point !! Supercells have a single ik but may have many it points so a given supercell head !! may be responsible for a range of points subroutine fm_calc_sc_heads(self) use mp, only: iproc, sum_allreduce implicit none class(fieldmat_type), intent(inout) :: self integer :: ik, is, ic, it !AJ There may be a problem here is any supercell has no assigned head. !AJ This is the case (I think) for 1,1; but it shouldn't actually cause !AJ a problem as 1,1 shouldn't be communicated anyway as only one process !AJ is involved in it. self%heads = 0 do ik=1,self%naky do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_head) then do ic=1,self%kyb(ik)%supercells(is)%ncell it = self%kyb(ik)%supercells(is)%cells(ic)%it_ind self%heads(it,ik) = iproc end do end if end do end do call sum_allreduce(self%heads) end subroutine fm_calc_sc_heads !> Redistribute the fields from g_lo to gf_lo !! This is used to take the initial fields that are calculated !! in initialisation using the fields_implicit functionality, !! and therefore in g_lo, and distribute them to gf_lo layout as !! well so that we can start the advance_gf_local routine with the !! correct field data in both formats. subroutine fm_scatter_fields(self,gf_ph,gf_ap,gf_bp,ph,ap,bp) use theta_grid, only: ntgrid use kt_grids, only: naky, ntheta0, kwork_filter use mp, only: nbsend, nbrecv, waitall, broadcast_sub use mp, only: rank_comm, comm_type, proc0, iproc, initialise_requests use run_parameters, only: has_phi, has_apar, has_bpar use gs2_layouts, only: gf_lo, g_lo, proc_id, idx, ik_idx, it_idx implicit none class(fieldmat_type), intent(inout) :: self complex, dimension(-ntgrid:,:,:), intent(inout) :: ph,ap,bp,gf_ph,gf_ap,gf_bp integer :: igf, ik, it, sendreqnum, recvreqnum, sender, receiver, tag integer, dimension (:), allocatable :: recv_handles, send_handles integer :: tempfield complex, dimension(:,:,:,:), allocatable :: tempdata allocate(tempdata(-ntgrid:ntgrid,nfield,gf_lo%ntheta0,gf_lo%naky)) !AJ This section is for sending data from one of the g_lo owner to the !AJ process that owns this it,ik point in gf_lo. recvreqnum = 1 allocate(recv_handles(naky*ntheta0)) allocate(send_handles(naky*ntheta0)) call initialise_requests(send_handles) call initialise_requests(recv_handles) if(proc0) then do ik = 1,self%naky do it = 1,self%ntheta0 if(kwork_filter(it,ik)) cycle !AJ Calculate which of the processes that own this it,ik point in g_lo space will send it to proc0 !AJ This can be optimised by ensuring we choose proc0 to send this if proc0 is one of the owners of this !AJ in g_lo (rather than just choosing the last process as we do here and elsewhere for load balance). sender = g_lo%ikit_procs_assignment(it,ik)%proc_list(g_lo%ikit_procs_assignment(it,ik)%num_procs) if(sender .ne. iproc) then tag = ik*self%ntheta0 + it call nbrecv(tempdata(:,:,it,ik),sender,tag,recv_handles(recvreqnum)) recvreqnum = recvreqnum + 1 end if end do end do end if sendreqnum = 1 !AJ Iterate through all the points I own in gf_lo and send that part of the field !AJ arrays to proc0 receiver = 0 do ik = 1,g_lo%naky do it = 1,g_lo%ntheta0 if(kwork_filter(it,ik)) cycle !AJ This can be optimised by ensuring we choose proc0 to send this if proc0 is one of the owners of this !AJ in g_lo (rather than just choosing the last process as we do here and elsewhere for load balance). if(g_lo%ikit_procs_assignment(it,ik)%proc_list(g_lo%ikit_procs_assignment(it,ik)%num_procs) .eq. iproc) then if(iproc .ne. receiver) then tag = ik*self%ntheta0 + it tempfield = 1 if(has_phi) then tempdata(:,tempfield,it,ik) = ph(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then tempdata(:,tempfield,it,ik) = ap(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then tempdata(:,tempfield,it,ik) = bp(:,it,ik) end if call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum)) sendreqnum = sendreqnum + 1 end if end if end do end do call waitall(recvreqnum-1,recv_handles) !AJ Now copy the received data out of the temporary array to the data arrays on proc0 if(proc0) then do ik = 1,self%naky do it = 1,self%ntheta0 if(kwork_filter(it,ik)) cycle !AJ Calculate which of the processes that own this it,ik point in g_lo space will send it to proc0 !AJ This can be optimised by ensuring we choose proc0 to send this if proc0 is one of the owners of this !AJ in g_lo (rather than just choosing the last process as we do here and elsewhere as a very !AJ basic attempt at load balancing). sender = g_lo%ikit_procs_assignment(it,ik)%proc_list(g_lo%ikit_procs_assignment(it,ik)%num_procs) if(sender .ne. iproc) then tempfield = 1 if(has_phi) then gf_ph(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_apar) then gf_ap(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then gf_bp(:,it,ik) = tempdata(:,tempfield,it,ik) end if else if(has_phi) then gf_ph(:,it,ik) = ph(:,it,ik) end if if(has_apar) then gf_ap(:,it,ik) = ap(:,it,ik) end if if(has_bpar) then gf_bp(:,it,ik) = bp(:,it,ik) end if end if end do end do end if call waitall(sendreqnum-1,send_handles) !AJ Send the data from one of the owners in g_lo to the owner in gf_lo. recvreqnum = 1 do igf = gf_lo%llim_proc, gf_lo%ulim_proc ik = ik_idx(gf_lo,igf) it = it_idx(gf_lo,igf) if(kwork_filter(it,ik)) cycle tag = ik*self%ntheta0 + it !AJ Calculate which process of the processes that owns this point in g_lo space will send this data to !AJ the owner in gf_lo sender = g_lo%ikit_procs_assignment(it,ik)%proc_list(g_lo%ikit_procs_assignment(it,ik)%num_procs) if(sender .ne. iproc) then call nbrecv(tempdata(:,:,it,ik),sender,tag,recv_handles(recvreqnum)) recvreqnum = recvreqnum + 1 end if end do sendreqnum = 1 do ik = 1,g_lo%naky do it = 1,g_lo%ntheta0 !AJ Choose the sender as the LAST process listed in the process list for a given ik,it point in g_lo !AJ We chose the last process here to try and ensure that proc0 isn't overloaded. Any of the processes !AJ in the proc_list could have been chosen if(g_lo%ikit_procs_assignment(it,ik)%proc_list(g_lo%ikit_procs_assignment(it,ik)%num_procs) .eq. iproc) then if(kwork_filter(it,ik)) cycle tag = ik*self%ntheta0 + it receiver = proc_id(gf_lo,idx(gf_lo,ik,it)) if(iproc .ne. receiver) then tempfield = 1 if(has_phi) then tempdata(:,tempfield,it,ik) = ph(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then tempdata(:,tempfield,it,ik) = ap(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then tempdata(:,tempfield,it,ik) = bp(:,it,ik) end if call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum)) sendreqnum = sendreqnum + 1 end if end if end do end do call waitall(recvreqnum-1,recv_handles) do igf = gf_lo%llim_proc, gf_lo%ulim_proc ik = ik_idx(gf_lo,igf) it = it_idx(gf_lo,igf) if(kwork_filter(it,ik)) cycle sender = g_lo%ikit_procs_assignment(it,ik)%proc_list(g_lo%ikit_procs_assignment(it,ik)%num_procs) if(sender .ne. iproc) then tempfield = 1 if(has_phi) then gf_ph(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_apar) then gf_ap(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then gf_bp(:,it,ik) = tempdata(:,tempfield,it,ik) end if else if(has_phi) then gf_ph(:,it,ik) = ph(:,it,ik) end if if(has_apar) then gf_ap(:,it,ik) = ap(:,it,ik) end if if(has_bpar) then gf_bp(:,it,ik) = bp(:,it,ik) end if end if end do call waitall(sendreqnum-1,send_handles) deallocate(tempdata) deallocate(send_handles, recv_handles) end subroutine fm_scatter_fields !> Redistribute initial fields from gf_lo to g_lo. These are fields that have not been reduced to the !! supercell heads, just calculated on each process that owns gf_lo points subroutine fm_gather_init_fields(self,gf_phnew,gf_apnew,gf_bpnew,gf_ph,gf_ap,gf_bp,phnew,apnew,bpnew,ph,ap,bp) use theta_grid, only: ntgrid use kt_grids, only: naky, ntheta0, kwork_filter use mp, only: nbsend, nbrecv, waitall, broadcast_sub, sum_reduce_sub, proc0 use mp, only: rank_comm, comm_type, initialise_requests, iproc use run_parameters, only: has_phi, has_apar, has_bpar use gs2_layouts, only: gf_lo, g_lo, proc_id, idx, ik_idx, it_idx implicit none class(fieldmat_type), intent(inout) :: self !AJ We have two sets of field arrays; ph,ap,bp store the data we hold in g_lo (i.e. !AJ the field points we have in the g_lo decomposition), and gf_ph,gf_ap,gf_bp hold !AJ the field points we have in the gf_lo decomposition. This was done for safety (to !AJ ensure that updating fields in gf_lo or g_lo did not interfere with each other), however !AJ I THINK it should be possible to do this in a single array. complex, dimension(-ntgrid:,:,:), intent(inout) :: ph,ap,bp,gf_ph,gf_ap,gf_bp complex, dimension(-ntgrid:,:,:), intent(inout) :: phnew,apnew,bpnew,gf_phnew,gf_apnew,gf_bpnew integer :: is, ic integer :: ik, it, ikit, sendreqnum, recvreqnum, sender, receiver, tag integer, dimension (:), allocatable :: recv_handles, send_handles integer :: tempfield, local_iproc complex, dimension(:,:,:,:), allocatable :: tempdata allocate(tempdata(-ntgrid:ntgrid,nfield,ntheta0,naky)) !AJ At this point each process has the correct initialised fields in gf_lo (i.e. the points it owns in gf_lo). !AJ what we want to do in this routine is distribute them so that all those processes who are part of the supercell !AJ in gf_lo have the data, and all the processes that own the points in g_lo also have the data. allocate(recv_handles(naky*ntheta0)) allocate(send_handles(naky*ntheta0)) call initialise_requests(send_handles) call initialise_requests(recv_handles) !AJ The first thing we do is send the data from the owner in gf_lo to the supercell head in gf_lo. This !AJ will enable us to distribute it to the whole supercell in gf_lo, and then after that to the processes !AJ that need it in g_lo tempdata = 0.0 recvreqnum = 1 sendreqnum = 1 do ik=1,self%naky if(self%kyb(ik)%is_empty) cycle do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_empty) cycle receiver = self%kyb(ik)%supercells(is)%head_iproc_world do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind sender = proc_id(gf_lo,idx(gf_lo,ik,it)) tag = ik*self%ntheta0 + it if(sender .eq. iproc .and. receiver .ne. iproc) then tempfield = 1 if(has_phi) then tempdata(:,tempfield,it,ik) = phnew(:,it,ik) !AJ This copy ensures that the field data is also up to date in the gf_lo data structures !AJ (the implicit field initialise calculates it in the g_lo data structures. This could be !AJ fixed in fields_implicit init_allfields_implicit making this copy redundant) gf_ph(:,it,ik) = phnew(:,it,ik) gf_phnew(:,it,ik) = phnew(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then tempdata(:,tempfield,it,ik) = apnew(:,it,ik) !AJ This copy ensures that the field data is also up to date in the gf_lo data structures !AJ (the implicit field initialise calculates it in the g_lo data structures. This could be !AJ fixed in fields_implicit init_allfields_implicit making this copy redundant) gf_ap(:,it,ik) = apnew(:,it,ik) gf_apnew(:,it,ik) = apnew(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then tempdata(:,tempfield,it,ik) = bpnew(:,it,ik) !AJ This copy ensures that the field data is also up to date in the gf_lo data structures !AJ (the implicit field initialise calculates it in the g_lo data structures. This could be !AJ fixed in fields_implicit init_allfields_implicit making this copy redundant) gf_bp(:,it,ik) = bpnew(:,it,ik) gf_bpnew(:,it,ik) = bpnew(:,it,ik) end if call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum)) sendreqnum = sendreqnum + 1 else if(receiver .eq. iproc .and. sender .ne. iproc) then call nbrecv(tempdata(:,:,it,ik),sender,tag,recv_handles(recvreqnum)) recvreqnum = recvreqnum + 1 !AJ Both the sender and receiver are the same process, so this process !AJ already has the correct gf_lo field data, but it is currently stored !AJ in the g_lo arrays (phi,apar,bpar) instead of the gf_lo arrays !AJ (gf_phi, gf_apar, gf_bpar). Copy it across so it is in the correct array. else if(receiver .eq. iproc .and. sender .eq. iproc) then tempfield = 1 if(has_phi) then gf_phnew(:,it,ik) = phnew(:,it,ik) gf_ph(:,it,ik) = phnew(:,it,ik) !AJ This copy is necessary so the data is in the correct place for the next !AJ communication (the sending from supercell head to the first proc in the !AJ g_lo list). tempdata(:,tempfield,it,ik) = phnew(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then gf_apnew(:,it,ik) = apnew(:,it,ik) gf_ap(:,it,ik) = apnew(:,it,ik) !AJ This copy is necessary so the data is in the correct place for the next !AJ communication (the sending from supercell head to the first proc in the !AJ g_lo list). tempdata(:,tempfield,it,ik) = apnew(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then gf_bpnew(:,it,ik) = bpnew(:,it,ik) gf_bp(:,it,ik) = bpnew(:,it,ik) !AJ This copy is necessary so the data is in the correct place for the next !AJ communication (the sending from supercell head to the first proc in the !AJ g_lo list). tempdata(:,tempfield,it,ik) = bpnew(:,it,ik) end if end if end do end do end do call waitall(recvreqnum-1,recv_handles) do ik=1,self%naky if(self%kyb(ik)%is_empty) cycle do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_empty) cycle if(.not.self%kyb(ik)%supercells(is)%is_head) cycle do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind sender = proc_id(gf_lo,idx(gf_lo,ik,it)) if(sender .ne. iproc) then tempfield = 1 if(has_phi) then gf_phnew(:,it,ik) = tempdata(:,tempfield,it,ik) gf_ph(:,it,ik) = gf_phnew(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then gf_apnew(:,it,ik) = tempdata(:,tempfield,it,ik) gf_ap(:,it,ik) = gf_apnew(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then gf_bpnew(:,it,ik) = tempdata(:,tempfield,it,ik) gf_bp(:,it,ik) = gf_bpnew(:,it,ik) end if end if end do end do end do call waitall(sendreqnum-1,send_handles) !AJ Now we collect all the data from the supercell heads to proc0 for diagnostics. If !AJ we implemented distributed diagnostics then this would not be necessary. if(self%fm_sub_headsc_p0%nproc.gt.0)then call sum_reduce_sub(tempdata,0,self%fm_sub_headsc_p0) !AJ proc0 unpacks the received field data and updates it's fields from it. !AJ Because proc0 now has all the field data it can update both the gf_lo !AJ fields and the g_lo fields. if(proc0) then tempfield = 1 if(has_phi) then ph = tempdata(:,tempfield,:,:) phnew = ph gf_ph = ph gf_phnew = ph tempfield = tempfield + 1 end if if(has_apar) then ap = tempdata(:,tempfield,:,:) apnew = ap gf_ap = ap gf_apnew = ap tempfield = tempfield + 1 end if if(has_bpar) then bp = tempdata(:,tempfield,:,:) bpnew = bp gf_bp = bp gf_bpnew = bp end if end if end if call initialise_requests(send_handles) call initialise_requests(recv_handles) !AJ Now send the data from the supercell head in gf_lo to the first process in the list of owners for !AJ the same point in g_lo recvreqnum = 1 do ikit=1, g_lo%ikitrange ik=g_lo%local_ikit_points(ikit)%ik it=g_lo%local_ikit_points(ikit)%it if(kwork_filter(it,ik)) cycle sender = self%heads(it,ik) if(sender .ne. iproc .and. .not. proc0) then if(g_lo%ikit_procs_assignment(it,ik)%proc_list(1) .eq. iproc) then tag = ik*self%ntheta0 + it call nbrecv(tempdata(:,:,it,ik),sender,tag,recv_handles(recvreqnum)) recvreqnum = recvreqnum + 1 end if end if end do sendreqnum = 1 do ik=1,self%naky if(self%kyb(ik)%is_empty) cycle do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_empty) cycle if(self%kyb(ik)%supercells(is)%is_head) then do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind if(kwork_filter(it,ik)) cycle receiver = g_lo%ikit_procs_assignment(it,ik)%proc_list(1) if(receiver .ne. iproc .and. receiver .ne. 0) then tag = ik*self%ntheta0 + it call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum)) sendreqnum = sendreqnum + 1 !AJ If I'm to be sending to myself then just copy the data from gf_lo arrays (where !AJ is was put in the communication to get it to supercell heads) to the correct g_lo arrays. else if(receiver .eq. iproc .and. .not. proc0) then if(has_phi) then phnew(:,it,ik) = gf_ph(:,it,ik) ph(:,it,ik) = phnew(:,it,ik) end if if(has_apar) then apnew(:,it,ik) = gf_ap(:,it,ik) ap(:,it,ik) = apnew(:,it,ik) end if if(has_bpar) then bpnew(:,it,ik) = gf_bp(:,it,ik) bp(:,it,ik) = bpnew(:,it,ik) end if end if end do end if end do end do call waitall(recvreqnum-1,recv_handles) do ikit=1, g_lo%ikitrange ik=g_lo%local_ikit_points(ikit)%ik it=g_lo%local_ikit_points(ikit)%it if(kwork_filter(it,ik)) cycle sender = self%heads(it,ik) receiver = g_lo%ikit_procs_assignment(it,ik)%proc_list(1) if(sender .ne. iproc .and. receiver .eq. iproc .and. .not. proc0) then tempfield = 1 if(has_phi) then phnew(:,it,ik) = tempdata(:,tempfield,it,ik) ph(:,it,ik) = phnew(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then apnew(:,it,ik) = tempdata(:,tempfield,it,ik) ap(:,it,ik) = apnew(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then bpnew(:,it,ik) = tempdata(:,tempfield,it,ik) bp(:,it,ik) = bpnew(:,it,ik) end if end if end do call waitall(sendreqnum-1,send_handles) deallocate(send_handles, recv_handles) !AJ Now get the data from the process in g_lo that has received a given ik,it point to the rest of the !AJ processes that own it. !AJ This could be optimised if the xyblock_comm is available, but as a first start the code below works !AJ and doesn't involve global collectives. do ik = 1,self%naky do it = 1,self%ntheta0 if(kwork_filter(it,ik)) cycle if(g_lo%ikit_procs_assignment(it,ik)%mine) then if(g_lo%ikit_procs_assignment(it,ik)%num_procs .gt. 1) then sender = g_lo%ikit_procs_assignment(it,ik)%sub_proc_list(1) local_iproc = g_lo%ikit_procs_assignment(it,ik)%comm%iproc !AJ This could be avoided on some proceses as tempdata will already have the correct data !AJ (providing sender .ne. receiver in the above calls) if(sender .eq. local_iproc .and. .not. proc0) then tempfield = 1 if(has_phi) then tempdata(:,tempfield,it,ik) = phnew(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then tempdata(:,tempfield,it,ik) = apnew(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then tempdata(:,tempfield,it,ik) = bpnew(:,it,ik) end if end if call broadcast_sub(tempdata(:,:,it,ik),sender,g_lo%ikit_procs_assignment(it,ik)%comm%id) if(sender .ne. local_iproc .and. .not. proc0) then tempfield = 1 if(has_phi) then phnew(:,it,ik) = tempdata(:,tempfield,it,ik) ph(:,it,ik) = phnew(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then apnew(:,it,ik) = tempdata(:,tempfield,it,ik) ap(:,it,ik) = apnew(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then bpnew(:,it,ik) = tempdata(:,tempfield,it,ik) bp(:,it,ik) = bpnew(:,it,ik) end if end if end if end if end do end do deallocate(tempdata) end subroutine fm_gather_init_fields !> Redistribute fields from gf_lo to g_lo. This first involves unpacking the calculated !! fields from tmpsum on the supercell heads to the field arrays and then sending them back !! to the processes that calculated them in gf_lo and the processes that need them in g_lo. subroutine fm_gather_fields(self,gf_ph,gf_ap,gf_bp,ph,ap,bp) use theta_grid, only: ntgrid use kt_grids, only: naky, ntheta0, kwork_filter use mp, only: nbsend, nbrecv, waitall, broadcast_sub, sum_reduce_sub use mp, only: rank_comm, comm_type, proc0, iproc, initialise_requests use run_parameters, only: has_phi, has_apar, has_bpar use gs2_layouts, only: gf_lo, g_lo, proc_id, idx, ik_idx, it_idx implicit none class(fieldmat_type), intent(inout) :: self !AJ We have two sets of field arrays; ph,ap,bp store the data we hold in g_lo (i.e. !AJ the field points we have in the g_lo decomposition), and gf_ph,gf_ap,gf_bp hold !AJ the field points we have in the gf_lo decomposition. This was done for safety (to !AJ ensure that updating fields in gf_lo or g_lo did not interfere with each other), however !AJ I THINK it should be possible to do this in a single array. complex, dimension(-ntgrid:,:,:), intent(inout) :: ph,ap,bp,gf_ph,gf_ap,gf_bp integer :: is, ic, iex, ig, bnd integer :: ik, it, ikit, sendreqnum, recvreqnum, sender, receiver, tag integer, dimension (:), allocatable :: recv_handles, send_handles integer :: tempfield, head, sub, local_iproc complex, dimension(:,:,:,:), allocatable :: tempdata,tempdata3 complex, dimension(:,:,:), allocatable :: tempdata2 logical :: use_broadcast = .false. allocate(tempdata(-ntgrid:ntgrid,nfield,ntheta0,naky)) allocate(tempdata3(-ntgrid:ntgrid,nfield,ntheta0,naky)) tempdata = 0.0 !CMRQ: cost of Supercell Head Packing? !CMR: BEGIN Supercell Head Packing !AJ Unpack tmp_sum on the supercell head, tmp_sum has the calculated field update on the supercell head do ik=1,self%naky if(.not.self%kyb(ik)%is_local) cycle do is=1,self%kyb(ik)%nsupercell !Skip if we're not the head supercell if(.not.self%kyb(ik)%supercells(is)%is_head) cycle iex=0 tempfield = 1 if(has_phi) then do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind bnd=0 if(ic.ne.self%kyb(ik)%supercells(is)%ncell) bnd=1 do ig=-ntgrid,ntgrid-bnd iex=iex+1 tempdata(ig,tempfield,it,ik)=self%kyb(ik)%supercells(is)%tmp_sum(iex) enddo enddo tempfield = tempfield + 1 end if if(has_apar) then do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind bnd=0 if(ic.ne.self%kyb(ik)%supercells(is)%ncell) bnd=1 do ig=-ntgrid,ntgrid-bnd iex=iex+1 tempdata(ig,tempfield,it,ik)=self%kyb(ik)%supercells(is)%tmp_sum(iex) enddo enddo tempfield = tempfield + 1 end if if(has_bpar) then do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind bnd=0 if(ic.ne.self%kyb(ik)%supercells(is)%ncell) bnd=1 do ig=-ntgrid,ntgrid-bnd iex=iex+1 tempdata(ig,tempfield,it,ik)=self%kyb(ik)%supercells(is)%tmp_sum(iex) enddo enddo end if tempfield = 1 !<DD>TAGGED:Worth looking at improving this bad memory pattern !/Now fix boundary points if(self%kyb(ik)%supercells(is)%ncell.gt.1) then if(has_phi) then do ic=1,self%kyb(ik)%supercells(is)%ncell-1 tempdata(ntgrid,tempfield,self%kyb(ik)%supercells(is)%cells(ic)%it_ind,ik)=& tempdata(-ntgrid,tempfield,self%kyb(ik)%supercells(is)%cells(ic+1)%it_ind,ik) enddo tempfield = tempfield + 1 end if if(has_apar) then do ic=1,self%kyb(ik)%supercells(is)%ncell-1 tempdata(ntgrid,tempfield,self%kyb(ik)%supercells(is)%cells(ic)%it_ind,ik)=& tempdata(-ntgrid,tempfield,self%kyb(ik)%supercells(is)%cells(ic+1)%it_ind,ik) enddo tempfield = tempfield + 1 end if if(has_bpar) then do ic=1,self%kyb(ik)%supercells(is)%ncell-1 tempdata(ntgrid,tempfield,self%kyb(ik)%supercells(is)%cells(ic)%it_ind,ik)=& tempdata(-ntgrid,tempfield,self%kyb(ik)%supercells(is)%cells(ic+1)%it_ind,ik) enddo end if endif end do enddo !AJ At this point tempdata has the calculated field update for a supercell on the supercell head. !CMR: END Supercell Head Packing !CMRQ: cost of Supercell Head => proc0? !CMR: BEGIN All Supercell Heads => proc0 allocate(recv_handles(naky*ntheta0)) allocate(send_handles(naky*ntheta0)) call initialise_requests(send_handles) call initialise_requests(recv_handles) recvreqnum = 1 !AJ Now we collect all the data from the supercell heads to proc0 for diagnostics. If !AJ we implemented distributed diagnostics then this would not be necessary. if(self%fm_sub_headsc_p0%nproc.gt.0)then call sum_reduce_sub(tempdata,0,self%fm_sub_headsc_p0) !AJ proc0 unpacks the received field update data and updates it's fields from it. !AJ Because proc0 now has all the field update data it can update both the gf_lo !AJ fields and the g_lo fields. if(proc0) then tempfield = 1 if(has_phi) then ph = tempdata(:,tempfield,:,:) gf_ph = ph tempfield = tempfield + 1 end if if(has_apar) then ap = tempdata(:,tempfield,:,:) gf_ap = ap tempfield = tempfield + 1 end if if(has_bpar) then bp = tempdata(:,tempfield,:,:) gf_bp = bp end if end if end if !CMR: END All Supercell Heads => proc0 !AJ Now send the data back to all processes in the supercell !AJ This is less efficient than it needs to be. !AJ We could also make use of the fact that in gf_lo a cell is only !AJ ever owned by a single owner so we could just send specific !AJ it,ik points to the processes that own them with point-to-point !AJ communications (see the else part of this) !CMR: BEGIN Supercell Heads => cells ! !CMR: Note AJ Comment above that USE_BROADCAST=T approach (default) is inefficient! ! !CMR: Observation, probably need MPI rank to participate in SINGLE supercell ! MPI rank holding multiple cells might be OK (and avoid comms contention) sometimes: ! -- if all cells in SAME supercell ! -- if cells in different supercells that need no COMMS (e.g. supercells with 1 cell) if(use_broadcast) then do ik=1,self%naky !CMR: Loop over each supercell (labelled ik, self%kyb(ik)%nsupercell), if(self%kyb(ik)%is_empty) cycle do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_empty) cycle !CMR: if get here, iproc participates head = self%kyb(ik)%supercells(is)%head_iproc sub = self%kyb(ik)%supercells(is)%sc_sub_all%id allocate(tempdata2(-ntgrid:ntgrid,nfield,self%kyb(ik)%supercells(is)%ncell)) !CMR: head does all packing ready for broadcast to all supercell if(self%kyb(ik)%supercells(is)%is_head) then do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind tempfield = 1 if(has_phi) then tempdata2(:,tempfield,ic) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_apar) then tempdata2(:,tempfield,ic) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then tempdata2(:,tempfield,ic) = tempdata(:,tempfield,it,ik) end if end do end if call broadcast_sub(tempdata2,head,sub) if(.not.self%kyb(ik)%supercells(is)%is_head) then !CMR: everyone else unpacks result of broadcast do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind tempfield = 1 if(has_phi) then gf_ph(:,it,ik) = tempdata2(:,tempfield,ic) tempfield = tempfield + 1 end if if(has_apar) then gf_ap(:,it,ik) = tempdata2(:,tempfield,ic) tempfield = tempfield + 1 end if if(has_bpar) then gf_bp(:,it,ik) = tempdata2(:,tempfield,ic) end if end do end if deallocate(tempdata2) end do end do !AJ Don't use the broadcast to do the sending back of data in the supercell, !AJ use point to point communications instead. else !CMR: NB this (more efficient?) approach is OFF by default! !CMRQ: Is alternative to USE_BROADCAST=F working? Is it more efficient? Why not default? call initialise_requests(send_handles) call initialise_requests(recv_handles) recvreqnum = 1 sendreqnum = 1 do ik=1,self%naky if(self%kyb(ik)%is_empty) cycle do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_empty) cycle sender = self%kyb(ik)%supercells(is)%head_iproc_world do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind receiver = proc_id(gf_lo,idx(gf_lo,ik,it)) !AJ For each supercell that I am head of send the data to the process that owns it in gf_lo if(self%kyb(ik)%supercells(is)%is_head) then !AJ If I am the supercell head and I'm not sending this to proc0 (as that already has all the data) !AJ or to myself then send the data. if(receiver .ne. iproc .and. receiver .ne. 0) then tag = ik*self%ntheta0 + it call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum)) sendreqnum = sendreqnum + 1 !AJ Otherwise, if I'm sending to myself and I'm not proc0 then unpack the !AJ the data directly from my buffer into the gf fields array. else if(receiver .eq. iproc .and. receiver .ne. 0) then tempfield = 1 if(has_phi) then gf_ph(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_apar) then gf_ap(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then gf_bp(:,it,ik) = tempdata(:,tempfield,it,ik) end if end if else if(iproc .eq. receiver .and. iproc .ne. 0) then tag = ik*self%ntheta0 + it call nbrecv(tempdata3(:,:,it,ik),sender,tag,recv_handles(recvreqnum)) recvreqnum = recvreqnum + 1 end if end do end do end do call waitall(recvreqnum-1,recv_handles) do ik=1,self%naky if(self%kyb(ik)%is_empty) cycle do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_empty) cycle if(self%kyb(ik)%supercells(is)%is_head) cycle do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind receiver = proc_id(gf_lo,idx(gf_lo,ik,it)) if(receiver .eq. iproc .and. receiver .ne. 0) then tempfield = 1 if(has_phi) then gf_ph(:,it,ik) = tempdata3(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_apar) then gf_ap(:,it,ik) = tempdata3(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then gf_bp(:,it,ik) = tempdata3(:,tempfield,it,ik) end if end if end do end do end do call waitall(sendreqnum-1,send_handles) end if !CMR: END Supercell Heads => cells (gf_fields) !CMR: BEGIN Supercell Heads => FIRST g_lo rank(ik,it) call initialise_requests(send_handles) call initialise_requests(recv_handles) recvreqnum = 1 do ikit=1, g_lo%ikitrange ik=g_lo%local_ikit_points(ikit)%ik it=g_lo%local_ikit_points(ikit)%it if(kwork_filter(it,ik)) cycle sender = self%heads(it,ik) if(sender .ne. iproc .and. iproc .ne. 0) then if(g_lo%ikit_procs_assignment(it,ik)%proc_list(1) .eq. iproc) then !CMR: if iproc is FIRST processor on list that must receive ik,it in g_lo, set up nbrecv tag = ik*self%ntheta0 + it call nbrecv(tempdata3(:,:,it,ik),sender,tag,recv_handles(recvreqnum)) recvreqnum = recvreqnum + 1 end if end if end do sendreqnum = 1 do ik=1,self%naky if(self%kyb(ik)%is_empty) cycle do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_empty) cycle if(self%kyb(ik)%supercells(is)%is_head) then !CMR: if iproc is supercell head for it,ik, proceed to set up nbsends do ic=1,self%kyb(ik)%supercells(is)%ncell it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind if(kwork_filter(it,ik)) cycle receiver = g_lo%ikit_procs_assignment(it,ik)%proc_list(1) if(receiver .ne. iproc .and. receiver .ne. 0) then tag = ik*self%ntheta0 + it !CMR: nbsend from supercell head to where needed in g_lo call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum)) sendreqnum = sendreqnum + 1 else if(receiver .eq. iproc .and. receiver .ne. 0) then !CMR: unpack data supercell head sends to ITSELF in g_lo tempfield = 1 if(has_phi) then ph(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_apar) then ap(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then bp(:,it,ik) = tempdata(:,tempfield,it,ik) end if end if end do end if end do end do call waitall(recvreqnum-1,recv_handles) do ikit=1, g_lo%ikitrange !CMR: all other cores unpack received data in g_lo ik=g_lo%local_ikit_points(ikit)%ik it=g_lo%local_ikit_points(ikit)%it if(kwork_filter(it,ik)) cycle sender = self%heads(it,ik) receiver = g_lo%ikit_procs_assignment(it,ik)%proc_list(1) if(sender .ne. iproc .and. receiver .eq. iproc .and. .not. proc0) then tempfield = 1 if(has_phi) then ph(:,it,ik) = tempdata3(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_apar) then ap(:,it,ik) = tempdata3(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then bp(:,it,ik) = tempdata3(:,tempfield,it,ik) end if end if end do call waitall(sendreqnum-1,send_handles) deallocate(send_handles, recv_handles) deallocate(tempdata3) !CMR: END Supercell Heads => FIRST g_lo rank(ik,it) !CMR: BEGIN FIRST g_lo rank(ik,it) => OTHER g_lo ranks(ik,it) !AJ Now get the data from the process in g_lo that has received a given ik,it point to the rest of the !AJ processes that own it. !AJ This could be optimised if the xyblock_comm is available, but as a first start the code below works !AJ and doesn't involve global collectives. do ik = 1,self%naky do it = 1,self%ntheta0 if(kwork_filter(it,ik)) cycle if(g_lo%ikit_procs_assignment(it,ik)%mine) then sender = g_lo%ikit_procs_assignment(it,ik)%sub_proc_list(1) local_iproc = g_lo%ikit_procs_assignment(it,ik)%comm%iproc if(sender .eq. local_iproc) then tempfield = 1 if(has_phi) then tempdata(:,tempfield,it,ik) = ph(:,it,ik) tempfield = tempfield + 1 end if if(has_apar) then tempdata(:,tempfield,it,ik) = ap(:,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then tempdata(:,tempfield,it,ik) = bp(:,it,ik) end if end if call broadcast_sub(tempdata(:,:,it,ik),sender,g_lo%ikit_procs_assignment(it,ik)%comm%id) if(sender .ne. local_iproc .and. .not. proc0) then tempfield = 1 if(has_phi) then ph(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_apar) then ap(:,it,ik) = tempdata(:,tempfield,it,ik) tempfield = tempfield + 1 end if if(has_bpar) then bp(:,it,ik) = tempdata(:,tempfield,it,ik) end if end if end if end do end do deallocate(tempdata) !CMR: END FIRST g_lo rank(ik,it) => OTHER g_lo ranks(ik,it) end subroutine fm_gather_fields !> Update the fields using calculated update !! We update both the g_lo (phi,apar,bpar) and gf_lo (gf_phi,gf_apar,gf_bpar) fields !! as these are needed to make sure we can progress the fields in g_lo and gf_lo both !! times and thereby not have to transfer the full fields for all processes every iteration, !! only the required updates. !! With some refactoring it should be possible to do all the work on a single set of field !! arrays as they are the same size regardless of the layout being used. subroutine fm_update_fields(phi, apar, bpar, gf_phi, gf_apar, gf_bpar) use fields_arrays, only: orig_phi=>phi, orig_apar=>apar, orig_bpar=>bpar use fields_arrays, only: orig_gf_phi=>gf_phi, orig_gf_apar=>gf_apar, orig_gf_bpar=>gf_bpar use run_parameters, only: has_phi, has_apar, has_bpar use kt_grids, only: kwork_filter use theta_grid, only: ntgrid use gs2_layouts, only: g_lo, gf_lo, ik_idx, it_idx use mp, only: proc0 implicit none complex,dimension(-ntgrid:,:,:),intent(inout) :: phi,apar,bpar complex,dimension(-ntgrid:,:,:),intent(inout) :: gf_phi,gf_apar,gf_bpar integer :: ik,it,ikit,igf !If we're proc0 then we need to do full array (for diagnostics) if(proc0) then if(has_phi) then phi=phi+orig_phi gf_phi=gf_phi+orig_gf_phi end if if(has_apar) then apar=apar+orig_apar gf_apar=gf_apar+orig_gf_apar end if if(has_bpar) then bpar=bpar+orig_bpar gf_bpar=gf_bpar+orig_gf_bpar end if return end if !AJ Iterate through each distinct ik,it point I have in g_lo and update the associated fields do ikit=1,g_lo%ikitrange ik = g_lo%local_ikit_points(ikit)%ik it = g_lo%local_ikit_points(ikit)%it if(kwork_filter(it,ik)) cycle if(has_phi) phi(:,it,ik)=phi(:,it,ik)+orig_phi(:,it,ik) if(has_apar) apar(:,it,ik)=apar(:,it,ik)+orig_apar(:,it,ik) if(has_bpar) bpar(:,it,ik)=bpar(:,it,ik)+orig_bpar(:,it,ik) end do !AJ Iterate through each gf_lo point I have and update the associated fields do igf=gf_lo%llim_proc,gf_lo%ulim_proc ik = ik_idx(gf_lo,igf) it = it_idx(gf_lo,igf) if(kwork_filter(it,ik)) cycle if(has_phi) gf_phi(:,it,ik)=gf_phi(:,it,ik)+orig_gf_phi(:,it,ik) if(has_apar) gf_apar(:,it,ik)=gf_apar(:,it,ik)+orig_gf_apar(:,it,ik) if(has_bpar) gf_bpar(:,it,ik)=gf_bpar(:,it,ik)+orig_gf_bpar(:,it,ik) end do end subroutine fm_update_fields !> Update the fields using the new fields subroutine fm_update_fields_newstep() use fields_arrays, only: phi,apar,bpar,phinew,aparnew,bparnew use fields_arrays, only: gf_phi,gf_apar,gf_bpar,gf_phinew,gf_aparnew,gf_bparnew use run_parameters, only: has_phi, has_apar, has_bpar use kt_grids, only: kwork_filter use gs2_layouts, only: g_lo, ik_idx, it_idx, gf_lo use mp, only: proc0 use array_utils, only: copy implicit none integer :: ik,it,ikit,igf !If we're proc0 then we need to do full array (for diagnostics) if(proc0) then if(has_phi) then call copy(phinew, phi) call copy(gf_phinew, gf_phi) end if if(has_apar) then call copy(aparnew, apar) call copy(gf_aparnew, gf_apar) end if if(has_bpar) then call copy(bparnew, bpar) call copy(gf_bparnew, gf_bpar) end if return end if !AJ Iterate through each distinct ik,it point I have in g_lo and update the associated fields do ikit=1,g_lo%ikitrange ik = g_lo%local_ikit_points(ikit)%ik it = g_lo%local_ikit_points(ikit)%it if(kwork_filter(it,ik)) cycle if(has_phi) phi(:,it,ik)=phinew(:,it,ik) if(has_apar) apar(:,it,ik)=aparnew(:,it,ik) if(has_bpar) bpar(:,it,ik)=bparnew(:,it,ik) end do !AJ Iterate through each gf_lo point I have and update the associated fields do igf=gf_lo%llim_proc,gf_lo%ulim_proc ik = ik_idx(gf_lo,igf) it = it_idx(gf_lo,igf) if(kwork_filter(it,ik)) cycle if(has_phi) gf_phi(:,it,ik)=gf_phinew(:,it,ik) if(has_apar) gf_apar(:,it,ik)=gf_aparnew(:,it,ik) if(has_bpar) gf_bpar(:,it,ik)=gf_bparnew(:,it,ik) end do end subroutine fm_update_fields_newstep !> Write some debug data to file subroutine fm_write_debug_data(self) use file_utils, only: open_output_file, close_output_file use mp, only: iproc, nproc_comm, rank_comm implicit none class(fieldmat_type), intent(in) :: self character(len=20) :: ext integer :: unit integer :: ik,is,ic,np,ip !Make the file extension write(ext,'(".debug.",I0,".md")') iproc !Open output file call open_output_file(unit,ext) !Now write data write(unit,'("Field matrix debug data for proc : ",I0)') iproc write(unit,'(80("="))') write(unit,'()') !/Here we have details of what cells this proc can see and how !they're marked write(unit,'("Details of object locality and state")') write(unit,'(80("-"))') write(unit,'()') write(unit,'(7(" ",A13," "))') "ik", "is", "ic", "is_local", "is_empty", "is_all_local", "is_head" write(unit,'(7(" ",14("-")))') do ik=1,self%naky do is=1,self%kyb(ik)%nsupercell do ic=1,self%kyb(ik)%supercells(is)%ncell write(unit,'(3(I14," "),4(L14," "))') ik,is,ic,& self%kyb(ik)%supercells(is)%cells(ic)%is_local,& self%kyb(ik)%supercells(is)%cells(ic)%is_empty,& self%kyb(ik)%supercells(is)%cells(ic)%is_all_local,& self%kyb(ik)%supercells(is)%is_head enddo enddo enddo write(unit,'(" ",7(14("-")," "))') write(unit,'()') !/Here we see what limits the row blocks have for the cells !we actually have write(unit,'()') write(unit,'("Row limits")') write(unit,'(80("-"))') write(unit,'()') do ik=1,self%naky write(unit,'("* ik : ",I0)') ik ! if(.not.self%kyb(ik)%is_local) cycle do is=1,self%kyb(ik)%nsupercell write(unit,'(" * is : ",I0)') is ! if(.not.self%kyb(ik)%supercells(is)%is_local) cycle write(unit,'(" ","is_head - ",L1)') self%kyb(ik)%supercells(is)%is_head write(unit,'(" ","is_empty - ",L1)') self%kyb(ik)%supercells(is)%is_empty write(unit,'(" ","is_local - ",L1)') self%kyb(ik)%supercells(is)%is_local write(unit,'(" ","is_all_local - ",L1)') self%kyb(ik)%supercells(is)%is_all_local write(unit,'()') write(unit,'(" ",4(" ",A8," "))') "ic", "rllim", "rulim", "nrow" write(unit,'(" ",4(" ",9("-")))') do ic=1,self%kyb(ik)%supercells(is)%ncell if(size(self%kyb(ik)%supercells(is)%cells(ic)%rb).gt.0)then write(unit,'(" ",4(I9," "))') ic,& self%kyb(ik)%supercells(is)%cells(ic)%rb(1)%row_llim,& self%kyb(ik)%supercells(is)%cells(ic)%rb(1)%row_ulim,& self%kyb(ik)%supercells(is)%cells(ic)%rb(1)%nrow endif enddo write(unit,'(" ",4(" ",9("-")))') enddo enddo write(unit,'()') !/Subcommunicators write(unit,'()') write(unit,'("Subcommunicators")') write(unit,'(80("-"))') write(unit,'()') do ik=1,self%naky write(unit,'("* ik : ",I0)') ik do is=1,self%kyb(ik)%nsupercell write(unit,'(" * is : ",I0)') is write(unit,'()') write(unit,'(" ",6(" ",A7," "))') "name", "nproc", "iproc", "proc0", "REnproc", "REiproc" write(unit,'(" ",6(" ",8("-")))') if(self%kyb(ik)%supercells(is)%sc_sub_all%nproc.gt.0)then call nproc_comm(self%kyb(ik)%supercells(is)%sc_sub_all%id,np) call rank_comm(self%kyb(ik)%supercells(is)%sc_sub_all%id,ip) else np=-1 ip=-1 endif write(unit,'(" ",(" ",A8),2(I8," "),1(L8," "),2(I8," "))') "all",& self%kyb(ik)%supercells(is)%sc_sub_all%nproc,& self%kyb(ik)%supercells(is)%sc_sub_all%iproc,& self%kyb(ik)%supercells(is)%sc_sub_all%proc0,& np,ip if(self%kyb(ik)%supercells(is)%sc_sub_pd%nproc.gt.0)then call nproc_comm(self%kyb(ik)%supercells(is)%sc_sub_pd%id,np) call rank_comm(self%kyb(ik)%supercells(is)%sc_sub_pd%id,ip) else np=-1 ip=-1 endif write(unit,'(" ",(" ",A8),2(I8," "),1(L8," "),2(I8," "))') "pd",& self%kyb(ik)%supercells(is)%sc_sub_pd%nproc,& self%kyb(ik)%supercells(is)%sc_sub_pd%iproc,& self%kyb(ik)%supercells(is)%sc_sub_pd%proc0,& np,ip if(self%kyb(ik)%supercells(is)%sc_sub_not_full%nproc.gt.0)then call nproc_comm(self%kyb(ik)%supercells(is)%sc_sub_not_full%id,np) call rank_comm(self%kyb(ik)%supercells(is)%sc_sub_not_full%id,ip) else np=-1 ip=-1 endif write(unit,'(" ",(" ",A8),2(I8," "),1(L8," "),2(I8," "))') "nfull",& self%kyb(ik)%supercells(is)%sc_sub_not_full%nproc,& self%kyb(ik)%supercells(is)%sc_sub_not_full%iproc,& self%kyb(ik)%supercells(is)%sc_sub_not_full%proc0,& np,ip write(unit,'(" ",6(" ",8("-")))') write(unit,'()') enddo enddo write(unit,'()') !Now close output file call close_output_file(unit) end subroutine fm_write_debug_data !> FIXME : Add documentation subroutine fm_getfieldeq_nogath (self,phi, apar, bpar, fieldeq, fieldeqa, fieldeqp) use theta_grid, only: ntgrid use dist_fn, only: getan_nogath use dist_fn_arrays, only: antot, antota, antotp implicit none class(fieldmat_type), intent(in) :: self complex, dimension (-ntgrid:,:,:), intent (in) :: phi, apar, bpar complex, dimension (-ntgrid:,:,:), intent (in out) ::fieldeq,fieldeqa,fieldeqp call getan_nogath(antot, antota, antotp) call self%getfieldeq1_nogath (phi, apar, bpar, antot, antota, antotp, & fieldeq, fieldeqa, fieldeqp) end subroutine fm_getfieldeq_nogath !> FIXME : Add documentation subroutine fm_getfieldeq1_nogath (self,phi, apar, bpar, antot, antota, antotp, & fieldeq, fieldeqa, fieldeqp) use gs2_layouts, only: ik_idx, it_idx use theta_grid, only: ntgrid, bmag use kt_grids, only: naky, ntheta0, kperp2, kwork_filter use run_parameters, only: has_phi, has_apar, has_bpar, beta use species, only: spec, has_electron_species use dist_fn, only: gamtot, gamtot1, gamtot2, fl_avg, apfac use dist_fn, only: adiabatic_option_switch, adiabatic_option_fieldlineavg, calculate_flux_surface_average implicit none class(fieldmat_type), intent(in) :: self complex, dimension (-ntgrid:,:,:), intent (in) :: phi, apar, bpar complex, dimension (-ntgrid:,:,:), intent (in) :: antot, antota, antotp complex, dimension (-ntgrid:,:,:), intent (in out) ::fieldeq,fieldeqa,fieldeqp integer :: ik, it, is, ic if (.not. allocated(fl_avg)) allocate (fl_avg(ntheta0, naky)) if (.not. has_electron_species(spec)) fl_avg = 0. if (.not. has_electron_species(spec)) then if (adiabatic_option_switch == adiabatic_option_fieldlineavg) then call calculate_flux_surface_average(fl_avg,antot) end if end if !AJ TODO: I thinkt his can be replaced by a loop over gf_lo points. !Now loop over cells and calculate field equation as required do ik=1,self%naky !Skip empty cells, note this is slightly different to skipping !.not.is_local. Skipping empty is probably faster but may be more dangerous if(self%kyb(ik)%is_empty) cycle do is=1,self%kyb(ik)%nsupercell if(self%kyb(ik)%supercells(is)%is_empty) cycle do ic=1,self%kyb(ik)%supercells(is)%ncell if(self%kyb(ik)%supercells(is)%cells(ic)%is_empty) cycle it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind if(kwork_filter(it,ik)) cycle !Now fill in data if(has_phi) then fieldeq(:,it,ik)=antot(:,it,ik)-gamtot(:,it,ik)*phi(:,it,ik) if(has_bpar) fieldeq(:,it,ik)=fieldeq(:,it,ik)+bpar(:,it,ik)*gamtot1(:,it,ik) if(.not.has_electron_species(spec)) fieldeq(:,it,ik)= fieldeq(:,it,ik)+fl_avg(it,ik) endif if(has_apar) then fieldeqa(:,it,ik)=antota(:,it,ik)-kperp2(:,it,ik)*apar(:,it,ik) endif if(has_bpar)then fieldeqp(:,it,ik)=bpar(:,it,ik) + & (antotp(:,it,ik)+bpar(:,it,ik)*gamtot2(:,it,ik) + & 0.5*phi(:,it,ik)*gamtot1(:,it,ik))*beta*apfac/bmag(:)**2 endif enddo enddo enddo end subroutine fm_getfieldeq1_nogath !------------------------------------------------------------------------ !================ !PROC_DECOMP !================ !> Allocate storage space subroutine pc_allocate(self) use kt_grids, only: naky, ntheta0 implicit none class(pc_type), intent(inout) :: self allocate(self%is_local(ntheta0,naky)) allocate(self%itik_subcom(ntheta0,naky)) allocate(self%nproc_per_cell(ntheta0,naky)) allocate(self%nresp_per_cell(ntheta0,naky)) allocate(self%navail_per_cell(ntheta0,naky)) end subroutine pc_allocate !> Deallocate storage space subroutine pc_deallocate(self) implicit none class(pc_type), intent(inout) :: self if(allocated(self%is_local)) deallocate(self%is_local) if(allocated(self%itik_subcom)) deallocate(self%itik_subcom) if(allocated(self%nproc_per_cell)) deallocate(self%nproc_per_cell) if(allocated(self%nresp_per_cell)) deallocate(self%nresp_per_cell) if(allocated(self%navail_per_cell)) deallocate(self%navail_per_cell) end subroutine pc_deallocate !> Debug printing subroutine pc_debug_print(self) implicit none class(pc_type), intent(in) :: self write(dlun,'("Proc type debug print. Index nrow=",I0)') & self%current_nresp() end subroutine pc_debug_print !> Return the current number of rows we're responsible for function pc_current_nresp(self) implicit none class(pc_type), intent(in) :: self integer :: pc_current_nresp pc_current_nresp=sum(self%nresp_per_cell) end function pc_current_nresp !> Work out which cells are local subroutine pc_find_locality(self) use gs2_layouts, only: gf_lo, ik_idx, it_idx use mp, only: sum_allreduce implicit none class(pc_type),intent(inout) :: self integer :: it,ik,igf !Initialise self%is_local=0 do igf=gf_lo%llim_proc,gf_lo%ulim_proc ik=ik_idx(gf_lo,igf) it=it_idx(gf_lo,igf) self%is_local(it,ik)=1 enddo !Now count how many procs have each cell self%nproc_per_cell=self%is_local call sum_allreduce(self%nproc_per_cell) end subroutine pc_find_locality !> Work out how many rows are available in each cell subroutine pc_count_avail(self) use kt_grids, only: ntheta0, naky implicit none class(pc_type),intent(inout) :: self integer :: it,ik,is !Initialise self%navail_per_cell=0 !Loop over cells do ik=1,naky do it=1,ntheta0 !If cell isn't local then cycle if(self%is_local(it,ik).ne.1) cycle !Now find the supercell object with this it,ik do is=1,fieldmat%kyb(ik)%nsupercell !If this supercell has the it then store the !size of the supercell and exit loop if(fieldmat%kyb(ik)%supercells(is)%has_it(it)) then self%navail_per_cell(it,ik)=fieldmat%kyb(ik)%supercells(is)%nrow exit endif enddo enddo enddo !Set the total number of responsible rows self%navail_tot=sum(self%navail_per_cell) end subroutine pc_count_avail !> Determine if we have any cells with passed ik? !! !! @note This just says if we can see index !! not that we have any data there function pc_has_ik(self,ik) implicit none class(pc_type), intent(in) :: self integer, intent(in) :: ik logical :: pc_has_ik pc_has_ik=any(self%is_local(:,ik).eq.1) end function pc_has_ik !> Determine if we have any cells with passed it? !! !! @note This just says if we can see index !! not that we have any data there function pc_has_it(self,it) implicit none class(pc_type), intent(in) :: self integer, intent(in) :: it logical :: pc_has_it pc_has_it=any(self%is_local(it,:).eq.1) end function pc_has_it !> A wrapper routine to do the decomposition subroutine pc_make_decomp(self) use mp, only: mp_abort implicit none class(pc_type), intent(inout) :: self call self%decomp end subroutine pc_make_decomp !> A routine to reset the pc object subroutine pc_reset(self) implicit none class(pc_type), intent(inout) :: self !deallocate call self%deallocate !Could zero out variables but no real need end subroutine pc_reset !> A routine to initialise the object subroutine pc_init(self) implicit none class(pc_type), intent(inout) :: self !First allocate arrays call self%allocate !Now find cell locality call self%find_locality !Count avail data call self%count_avail end subroutine pc_init !--------------------------- !Decomposition routines !--------------------------- !> The process decomposition works on the gf_lo layout, so if you have a !! point in gf_lo that is in a supercell you are in that supercell and you !! own the whole cell associated with that point, otherwise you don't have anything !! in that supercell, i.e. each cell has a single owner. subroutine pc_decomp(self) use mp, only: split, comm_type, free_comm implicit none class(pc_type), intent(inout) :: self integer :: ik, is, ic, ifq, it, llim, ulim integer :: nrow_tmp !/Don't need to change pc%is_local as this is already correct !Make sure we do the invert locally as mpi based version not implemented! pc%force_local_invert=.true. !Now we (re)calculate how much data is available call self%count_avail !Initialise the number of rows responsible self%nresp_per_cell=0 !Now loop over all cells, setting the row_llim of !their row blocks do ik=1,fieldmat%naky do is=1,fieldmat%kyb(ik)%nsupercell nrow_tmp=fieldmat%kyb(ik)%supercells(is)%nrow do ic=1,fieldmat%kyb(ik)%supercells(is)%ncell !Store it index it=fieldmat%kyb(ik)%supercells(is)%cells(ic)%it_ind !AJ cell locality is restricted to a single process in gf_lo, meaning for each cell there is only !AJ one process that will match this if statement. if(fieldmat%kyb(ik)%supercells(is)%cells(ic)%is_local)then !Now work out limits llim=1 ulim=llim+nrow_tmp-1 !Now loop over row blocks to set index do ifq=1,fieldmat%kyb(ik)%supercells(is)%cells(ic)%nrb fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_llim=llim fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_ulim=ulim call fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%set_nrow enddo else do ifq=1,fieldmat%kyb(ik)%supercells(is)%cells(ic)%nrb fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_llim=0 fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_ulim=0 call fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%set_nrow enddo endif !Record the number of responsible rows, note we assume all rb have same size if(size(fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb).gt.0)then self%nresp_per_cell(it,ik)=fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(1)%nrow else self%nresp_per_cell(it,ik)=0 endif enddo enddo enddo end subroutine pc_decomp !------------------------------------------------------------------------ !////////////////////////////// !// MODULE LEVEL ROUTINES !////////////////////////////// !> Routine to initialise subroutine init_fields_gf_local use antenna, only: init_antenna use theta_grid, only: init_theta_grid use kt_grids, only: init_kt_grids use gs2_layouts, only: init_gs2_layouts use mp, only: proc0, mp_abort, barrier use file_utils, only: error_unit implicit none !Early exit if possible if (initialised) return !Exit if local fields not supported (e.g. compiled with pgi) if(.not.fields_gf_local_functional()) call mp_abort("field_option='gf local' not supported with this build") !Make sure any dependencies are already initialised and !initialise the field matrix object. if (debug.and.proc0) write(6,*) "init_fields_gf_local: gs2_layouts" call init_gs2_layouts if (debug.and.proc0) write(6,*) "init_fields_gf_local: theta_grid" call init_theta_grid if (debug.and.proc0) write(6,*) "init_fields_gf_local: kt_grids" call init_kt_grids if (debug.and.proc0) write(6,*) "init_fields_gf_local: init_fields_matrixlocal" call init_fields_matrixlocal if (debug.and.proc0) write(6,*) "init_fields_gf_local: antenna" call init_antenna !Set the initialised state initialised = .true. reinit = .false. end subroutine init_fields_gf_local !> Routine use to initialise field matrix data subroutine init_fields_matrixlocal use mp, only: proc0, barrier implicit none logical :: response_was_read real :: ts,te !Exit if we've already initialised if(initialised) return if(.not.reinit)then !Use the fieldmat init routine to create fieldmat !structure and fill some of the basic variables !if(debug)call barrier if(proc0.and.debug) then write(dlun,'("Initialising fieldmat.")') call cpu_time(ts) endif call fieldmat%init !if(debug)call barrier if(proc0.and.debug) then call cpu_time(te) write(dlun,'("--Done in ",F12.4," units")') te-ts endif !Now initialise the parallel decomposition object !Note: We don't actually decompose until we make some !of the primary subcom so we know how many procs are !available to each object etc. !if(debug)call barrier if(proc0.and.debug) then write(dlun,'("Initialising pc.")') call cpu_time(ts) endif call pc%init !if(debug)call barrier if(proc0.and.debug) then call cpu_time(te) write(dlun,'("--Done in ",F12.4," units")') te-ts endif !Now use the pc object to set field object locality !and make the primary subcommunicators !if(debug)call barrier if(proc0.and.debug) then write(dlun,'("Setting initial locality and making primary subcommunicators.")') call cpu_time(ts) endif call fieldmat%set_is_local call fieldmat%make_subcom_1 !if(debug)call barrier if(proc0.and.debug) then call cpu_time(te) write(dlun,'("--Done in ",F12.4," units")') te-ts endif !Now we can setup the decomposition, which basically !consists of setting the row limit of row block objects !if(debug)call barrier if(proc0.and.debug) then write(dlun,'("Initialising decomposition.")') call cpu_time(ts) endif call pc%make_decomp !if(debug)call barrier if(proc0.and.debug) then call cpu_time(te) write(dlun,'("--Done in ",F12.4," units")') te-ts endif !Now set fieldmat locality and allocate data arrays !if(debug)call barrier if(proc0.and.debug) then write(dlun,'("Setting locality and allocating.")') call cpu_time(ts) endif call fieldmat%set_is_local call fieldmat%allocate !if(debug)call barrier if(proc0.and.debug) then call cpu_time(te) write(dlun,'("--Done in ",F12.4," units")') te-ts endif !Need to make cell/supercell level sub-communicators !at some point before we try to invert. !Can make them at any point after locality defined !/Note that to split a sub-comm everybody in the original !communicator needs to make the call. We can either split !all groups off from WORLD or we can take "nested subsets" !Note sure if there is any perfomance benefit in the actual !comms (i'd guess not) but it should be faster to split smaller