Redistribute initial fields from gf_lo to g_lo. These are fields that have not been reduced to the supercell heads, just calculated on each process that owns gf_lo points
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
class(fieldmat_type), | intent(inout) | :: | self | |||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | gf_phnew | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | gf_apnew | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | gf_bpnew | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | gf_ph | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | gf_ap | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | gf_bp | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | phnew | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | apnew | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | bpnew | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | ph | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | ap | ||
complex, | intent(inout), | dimension(-ntgrid:,:,:) | :: | bp |
subroutine fm_gather_init_fields(self,gf_phnew,gf_apnew,gf_bpnew,gf_ph,gf_ap,gf_bp,phnew,apnew,bpnew,ph,ap,bp)
use theta_grid, only: ntgrid
use kt_grids, only: naky, ntheta0, kwork_filter
use mp, only: nbsend, nbrecv, waitall, broadcast_sub, sum_reduce_sub, proc0
use mp, only: rank_comm, comm_type, initialise_requests, iproc
use run_parameters, only: has_phi, has_apar, has_bpar
use gs2_layouts, only: gf_lo, g_lo, proc_id, idx, ik_idx, it_idx
implicit none
class(fieldmat_type), intent(inout) :: self
!AJ We have two sets of field arrays; ph,ap,bp store the data we hold in g_lo (i.e.
!AJ the field points we have in the g_lo decomposition), and gf_ph,gf_ap,gf_bp hold
!AJ the field points we have in the gf_lo decomposition. This was done for safety (to
!AJ ensure that updating fields in gf_lo or g_lo did not interfere with each other), however
!AJ I THINK it should be possible to do this in a single array.
complex, dimension(-ntgrid:,:,:), intent(inout) :: ph,ap,bp,gf_ph,gf_ap,gf_bp
complex, dimension(-ntgrid:,:,:), intent(inout) :: phnew,apnew,bpnew,gf_phnew,gf_apnew,gf_bpnew
integer :: is, ic
integer :: ik, it, ikit, sendreqnum, recvreqnum, sender, receiver, tag
integer, dimension (:), allocatable :: recv_handles, send_handles
integer :: tempfield, local_iproc
complex, dimension(:,:,:,:), allocatable :: tempdata
allocate(tempdata(-ntgrid:ntgrid,nfield,ntheta0,naky))
!AJ At this point each process has the correct initialised fields in gf_lo (i.e. the points it owns in gf_lo).
!AJ what we want to do in this routine is distribute them so that all those processes who are part of the supercell
!AJ in gf_lo have the data, and all the processes that own the points in g_lo also have the data.
allocate(recv_handles(naky*ntheta0))
allocate(send_handles(naky*ntheta0))
call initialise_requests(send_handles)
call initialise_requests(recv_handles)
!AJ The first thing we do is send the data from the owner in gf_lo to the supercell head in gf_lo. This
!AJ will enable us to distribute it to the whole supercell in gf_lo, and then after that to the processes
!AJ that need it in g_lo
tempdata = 0.0
recvreqnum = 1
sendreqnum = 1
do ik=1,self%naky
if(self%kyb(ik)%is_empty) cycle
do is=1,self%kyb(ik)%nsupercell
if(self%kyb(ik)%supercells(is)%is_empty) cycle
receiver = self%kyb(ik)%supercells(is)%head_iproc_world
do ic=1,self%kyb(ik)%supercells(is)%ncell
it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind
sender = proc_id(gf_lo,idx(gf_lo,ik,it))
tag = ik*self%ntheta0 + it
if(sender .eq. iproc .and. receiver .ne. iproc) then
tempfield = 1
if(has_phi) then
tempdata(:,tempfield,it,ik) = phnew(:,it,ik)
!AJ This copy ensures that the field data is also up to date in the gf_lo data structures
!AJ (the implicit field initialise calculates it in the g_lo data structures. This could be
!AJ fixed in fields_implicit init_allfields_implicit making this copy redundant)
gf_ph(:,it,ik) = phnew(:,it,ik)
gf_phnew(:,it,ik) = phnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_apar) then
tempdata(:,tempfield,it,ik) = apnew(:,it,ik)
!AJ This copy ensures that the field data is also up to date in the gf_lo data structures
!AJ (the implicit field initialise calculates it in the g_lo data structures. This could be
!AJ fixed in fields_implicit init_allfields_implicit making this copy redundant)
gf_ap(:,it,ik) = apnew(:,it,ik)
gf_apnew(:,it,ik) = apnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_bpar) then
tempdata(:,tempfield,it,ik) = bpnew(:,it,ik)
!AJ This copy ensures that the field data is also up to date in the gf_lo data structures
!AJ (the implicit field initialise calculates it in the g_lo data structures. This could be
!AJ fixed in fields_implicit init_allfields_implicit making this copy redundant)
gf_bp(:,it,ik) = bpnew(:,it,ik)
gf_bpnew(:,it,ik) = bpnew(:,it,ik)
end if
call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum))
sendreqnum = sendreqnum + 1
else if(receiver .eq. iproc .and. sender .ne. iproc) then
call nbrecv(tempdata(:,:,it,ik),sender,tag,recv_handles(recvreqnum))
recvreqnum = recvreqnum + 1
!AJ Both the sender and receiver are the same process, so this process
!AJ already has the correct gf_lo field data, but it is currently stored
!AJ in the g_lo arrays (phi,apar,bpar) instead of the gf_lo arrays
!AJ (gf_phi, gf_apar, gf_bpar). Copy it across so it is in the correct array.
else if(receiver .eq. iproc .and. sender .eq. iproc) then
tempfield = 1
if(has_phi) then
gf_phnew(:,it,ik) = phnew(:,it,ik)
gf_ph(:,it,ik) = phnew(:,it,ik)
!AJ This copy is necessary so the data is in the correct place for the next
!AJ communication (the sending from supercell head to the first proc in the
!AJ g_lo list).
tempdata(:,tempfield,it,ik) = phnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_apar) then
gf_apnew(:,it,ik) = apnew(:,it,ik)
gf_ap(:,it,ik) = apnew(:,it,ik)
!AJ This copy is necessary so the data is in the correct place for the next
!AJ communication (the sending from supercell head to the first proc in the
!AJ g_lo list).
tempdata(:,tempfield,it,ik) = apnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_bpar) then
gf_bpnew(:,it,ik) = bpnew(:,it,ik)
gf_bp(:,it,ik) = bpnew(:,it,ik)
!AJ This copy is necessary so the data is in the correct place for the next
!AJ communication (the sending from supercell head to the first proc in the
!AJ g_lo list).
tempdata(:,tempfield,it,ik) = bpnew(:,it,ik)
end if
end if
end do
end do
end do
call waitall(recvreqnum-1,recv_handles)
do ik=1,self%naky
if(self%kyb(ik)%is_empty) cycle
do is=1,self%kyb(ik)%nsupercell
if(self%kyb(ik)%supercells(is)%is_empty) cycle
if(.not.self%kyb(ik)%supercells(is)%is_head) cycle
do ic=1,self%kyb(ik)%supercells(is)%ncell
it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind
sender = proc_id(gf_lo,idx(gf_lo,ik,it))
if(sender .ne. iproc) then
tempfield = 1
if(has_phi) then
gf_phnew(:,it,ik) = tempdata(:,tempfield,it,ik)
gf_ph(:,it,ik) = gf_phnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_apar) then
gf_apnew(:,it,ik) = tempdata(:,tempfield,it,ik)
gf_ap(:,it,ik) = gf_apnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_bpar) then
gf_bpnew(:,it,ik) = tempdata(:,tempfield,it,ik)
gf_bp(:,it,ik) = gf_bpnew(:,it,ik)
end if
end if
end do
end do
end do
call waitall(sendreqnum-1,send_handles)
!AJ Now we collect all the data from the supercell heads to proc0 for diagnostics. If
!AJ we implemented distributed diagnostics then this would not be necessary.
if(self%fm_sub_headsc_p0%nproc.gt.0)then
call sum_reduce_sub(tempdata,0,self%fm_sub_headsc_p0)
!AJ proc0 unpacks the received field data and updates it's fields from it.
!AJ Because proc0 now has all the field data it can update both the gf_lo
!AJ fields and the g_lo fields.
if(proc0) then
tempfield = 1
if(has_phi) then
ph = tempdata(:,tempfield,:,:)
phnew = ph
gf_ph = ph
gf_phnew = ph
tempfield = tempfield + 1
end if
if(has_apar) then
ap = tempdata(:,tempfield,:,:)
apnew = ap
gf_ap = ap
gf_apnew = ap
tempfield = tempfield + 1
end if
if(has_bpar) then
bp = tempdata(:,tempfield,:,:)
bpnew = bp
gf_bp = bp
gf_bpnew = bp
end if
end if
end if
call initialise_requests(send_handles)
call initialise_requests(recv_handles)
!AJ Now send the data from the supercell head in gf_lo to the first process in the list of owners for
!AJ the same point in g_lo
recvreqnum = 1
do ikit=1, g_lo%ikitrange
ik=g_lo%local_ikit_points(ikit)%ik
it=g_lo%local_ikit_points(ikit)%it
if(kwork_filter(it,ik)) cycle
sender = self%heads(it,ik)
if(sender .ne. iproc .and. .not. proc0) then
if(g_lo%ikit_procs_assignment(it,ik)%proc_list(1) .eq. iproc) then
tag = ik*self%ntheta0 + it
call nbrecv(tempdata(:,:,it,ik),sender,tag,recv_handles(recvreqnum))
recvreqnum = recvreqnum + 1
end if
end if
end do
sendreqnum = 1
do ik=1,self%naky
if(self%kyb(ik)%is_empty) cycle
do is=1,self%kyb(ik)%nsupercell
if(self%kyb(ik)%supercells(is)%is_empty) cycle
if(self%kyb(ik)%supercells(is)%is_head) then
do ic=1,self%kyb(ik)%supercells(is)%ncell
it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind
if(kwork_filter(it,ik)) cycle
receiver = g_lo%ikit_procs_assignment(it,ik)%proc_list(1)
if(receiver .ne. iproc .and. receiver .ne. 0) then
tag = ik*self%ntheta0 + it
call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum))
sendreqnum = sendreqnum + 1
!AJ If I'm to be sending to myself then just copy the data from gf_lo arrays (where
!AJ is was put in the communication to get it to supercell heads) to the correct g_lo arrays.
else if(receiver .eq. iproc .and. .not. proc0) then
if(has_phi) then
phnew(:,it,ik) = gf_ph(:,it,ik)
ph(:,it,ik) = phnew(:,it,ik)
end if
if(has_apar) then
apnew(:,it,ik) = gf_ap(:,it,ik)
ap(:,it,ik) = apnew(:,it,ik)
end if
if(has_bpar) then
bpnew(:,it,ik) = gf_bp(:,it,ik)
bp(:,it,ik) = bpnew(:,it,ik)
end if
end if
end do
end if
end do
end do
call waitall(recvreqnum-1,recv_handles)
do ikit=1, g_lo%ikitrange
ik=g_lo%local_ikit_points(ikit)%ik
it=g_lo%local_ikit_points(ikit)%it
if(kwork_filter(it,ik)) cycle
sender = self%heads(it,ik)
receiver = g_lo%ikit_procs_assignment(it,ik)%proc_list(1)
if(sender .ne. iproc .and. receiver .eq. iproc .and. .not. proc0) then
tempfield = 1
if(has_phi) then
phnew(:,it,ik) = tempdata(:,tempfield,it,ik)
ph(:,it,ik) = phnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_apar) then
apnew(:,it,ik) = tempdata(:,tempfield,it,ik)
ap(:,it,ik) = apnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_bpar) then
bpnew(:,it,ik) = tempdata(:,tempfield,it,ik)
bp(:,it,ik) = bpnew(:,it,ik)
end if
end if
end do
call waitall(sendreqnum-1,send_handles)
deallocate(send_handles, recv_handles)
!AJ Now get the data from the process in g_lo that has received a given ik,it point to the rest of the
!AJ processes that own it.
!AJ This could be optimised if the xyblock_comm is available, but as a first start the code below works
!AJ and doesn't involve global collectives.
do ik = 1,self%naky
do it = 1,self%ntheta0
if(kwork_filter(it,ik)) cycle
if(g_lo%ikit_procs_assignment(it,ik)%mine) then
if(g_lo%ikit_procs_assignment(it,ik)%num_procs .gt. 1) then
sender = g_lo%ikit_procs_assignment(it,ik)%sub_proc_list(1)
local_iproc = g_lo%ikit_procs_assignment(it,ik)%comm%iproc
!AJ This could be avoided on some proceses as tempdata will already have the correct data
!AJ (providing sender .ne. receiver in the above calls)
if(sender .eq. local_iproc .and. .not. proc0) then
tempfield = 1
if(has_phi) then
tempdata(:,tempfield,it,ik) = phnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_apar) then
tempdata(:,tempfield,it,ik) = apnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_bpar) then
tempdata(:,tempfield,it,ik) = bpnew(:,it,ik)
end if
end if
call broadcast_sub(tempdata(:,:,it,ik),sender,g_lo%ikit_procs_assignment(it,ik)%comm%id)
if(sender .ne. local_iproc .and. .not. proc0) then
tempfield = 1
if(has_phi) then
phnew(:,it,ik) = tempdata(:,tempfield,it,ik)
ph(:,it,ik) = phnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_apar) then
apnew(:,it,ik) = tempdata(:,tempfield,it,ik)
ap(:,it,ik) = apnew(:,it,ik)
tempfield = tempfield + 1
end if
if(has_bpar) then
bpnew(:,it,ik) = tempdata(:,tempfield,it,ik)
bp(:,it,ik) = bpnew(:,it,ik)
end if
end if
end if
end if
end do
end do
deallocate(tempdata)
end subroutine fm_gather_init_fields