fm_gather_init_fields Subroutine

private subroutine fm_gather_init_fields(self, gf_phnew, gf_apnew, gf_bpnew, gf_ph, gf_ap, gf_bp, phnew, apnew, bpnew, ph, ap, bp)

Redistribute initial fields from gf_lo to g_lo. These are fields that have not been reduced to the supercell heads, just calculated on each process that owns gf_lo points

Type Bound

fieldmat_type

Arguments

Type IntentOptional Attributes Name
class(fieldmat_type), intent(inout) :: self
complex, intent(inout), dimension(-ntgrid:,:,:) :: gf_phnew
complex, intent(inout), dimension(-ntgrid:,:,:) :: gf_apnew
complex, intent(inout), dimension(-ntgrid:,:,:) :: gf_bpnew
complex, intent(inout), dimension(-ntgrid:,:,:) :: gf_ph
complex, intent(inout), dimension(-ntgrid:,:,:) :: gf_ap
complex, intent(inout), dimension(-ntgrid:,:,:) :: gf_bp
complex, intent(inout), dimension(-ntgrid:,:,:) :: phnew
complex, intent(inout), dimension(-ntgrid:,:,:) :: apnew
complex, intent(inout), dimension(-ntgrid:,:,:) :: bpnew
complex, intent(inout), dimension(-ntgrid:,:,:) :: ph
complex, intent(inout), dimension(-ntgrid:,:,:) :: ap
complex, intent(inout), dimension(-ntgrid:,:,:) :: bp

Contents

Source Code


Source Code

  subroutine fm_gather_init_fields(self,gf_phnew,gf_apnew,gf_bpnew,gf_ph,gf_ap,gf_bp,phnew,apnew,bpnew,ph,ap,bp)
    use theta_grid, only: ntgrid
    use kt_grids, only: naky, ntheta0, kwork_filter
    use mp, only: nbsend, nbrecv, waitall, broadcast_sub, sum_reduce_sub, proc0
    use mp, only: rank_comm, comm_type, initialise_requests, iproc
    use run_parameters, only: has_phi, has_apar, has_bpar
    use gs2_layouts, only: gf_lo, g_lo, proc_id, idx, ik_idx, it_idx
    implicit none
    class(fieldmat_type), intent(inout) :: self
    !AJ We have two sets of field arrays; ph,ap,bp store the data we hold in g_lo (i.e. 
    !AJ the field points we have in the g_lo decomposition), and gf_ph,gf_ap,gf_bp hold 
    !AJ the field points we have in the gf_lo decomposition.  This was done for safety (to 
    !AJ ensure that updating fields in gf_lo or g_lo did not interfere with each other), however
    !AJ I THINK it should be possible to do this in a single array.
    complex, dimension(-ntgrid:,:,:), intent(inout) :: ph,ap,bp,gf_ph,gf_ap,gf_bp
    complex, dimension(-ntgrid:,:,:), intent(inout) :: phnew,apnew,bpnew,gf_phnew,gf_apnew,gf_bpnew
    integer :: is, ic
    integer :: ik, it, ikit, sendreqnum, recvreqnum, sender, receiver, tag
    integer, dimension (:), allocatable :: recv_handles, send_handles
    integer :: tempfield, local_iproc
    complex, dimension(:,:,:,:), allocatable :: tempdata

    allocate(tempdata(-ntgrid:ntgrid,nfield,ntheta0,naky))
    !AJ At this point each process has the correct initialised fields in gf_lo (i.e. the points it owns in gf_lo).
    !AJ what we want to do in this routine is distribute them so that all those processes who are part of the supercell
    !AJ in gf_lo have the data, and all the processes that own the points in g_lo also have the data.

    allocate(recv_handles(naky*ntheta0))
    allocate(send_handles(naky*ntheta0))

    call initialise_requests(send_handles)
    call initialise_requests(recv_handles)

    !AJ The first thing we do is send the data from the owner in gf_lo to the supercell head in gf_lo.  This 
    !AJ will enable us to distribute it to the whole supercell in gf_lo, and then after that to the processes 
    !AJ that need it in g_lo

    tempdata = 0.0

    recvreqnum = 1
    sendreqnum = 1
    do ik=1,self%naky
       if(self%kyb(ik)%is_empty) cycle
       do is=1,self%kyb(ik)%nsupercell
          if(self%kyb(ik)%supercells(is)%is_empty) cycle
          receiver = self%kyb(ik)%supercells(is)%head_iproc_world
          do ic=1,self%kyb(ik)%supercells(is)%ncell                
             it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind
             sender = proc_id(gf_lo,idx(gf_lo,ik,it))
             tag = ik*self%ntheta0 + it
             if(sender .eq. iproc .and. receiver .ne. iproc) then
                tempfield = 1
                if(has_phi) then
                   tempdata(:,tempfield,it,ik) = phnew(:,it,ik)
                   !AJ This copy ensures that the field data is also up to date in the gf_lo data structures 
                   !AJ (the implicit field initialise calculates it in the g_lo data structures.  This could be
                   !AJ fixed in fields_implicit init_allfields_implicit making this copy redundant)
                   gf_ph(:,it,ik) = phnew(:,it,ik)
                   gf_phnew(:,it,ik) = phnew(:,it,ik)
                   tempfield = tempfield + 1
                end if
                if(has_apar) then
                   tempdata(:,tempfield,it,ik) = apnew(:,it,ik)
                   !AJ This copy ensures that the field data is also up to date in the gf_lo data structures 
                   !AJ (the implicit field initialise calculates it in the g_lo data structures.  This could be
                   !AJ fixed in fields_implicit init_allfields_implicit making this copy redundant)
                   gf_ap(:,it,ik) = apnew(:,it,ik)
                   gf_apnew(:,it,ik) = apnew(:,it,ik)
                   tempfield = tempfield + 1
                end if
                if(has_bpar) then
                   tempdata(:,tempfield,it,ik) = bpnew(:,it,ik)
                   !AJ This copy ensures that the field data is also up to date in the gf_lo data structures 
                   !AJ (the implicit field initialise calculates it in the g_lo data structures.  This could be
                   !AJ fixed in fields_implicit init_allfields_implicit making this copy redundant)
                   gf_bp(:,it,ik) = bpnew(:,it,ik)
                   gf_bpnew(:,it,ik) = bpnew(:,it,ik)
                end if
                call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum))
                sendreqnum = sendreqnum + 1
             else if(receiver .eq. iproc .and. sender .ne. iproc) then
                call nbrecv(tempdata(:,:,it,ik),sender,tag,recv_handles(recvreqnum))                
                recvreqnum = recvreqnum + 1
                !AJ Both the sender and receiver are the same process, so this process 
                !AJ already has the correct gf_lo field data, but it is currently stored 
                !AJ in the g_lo arrays (phi,apar,bpar) instead of the gf_lo arrays 
                !AJ (gf_phi, gf_apar, gf_bpar).  Copy it across so it is in the correct array.
             else if(receiver .eq. iproc .and. sender .eq. iproc) then
                tempfield = 1
                if(has_phi) then
                   gf_phnew(:,it,ik) = phnew(:,it,ik)
                   gf_ph(:,it,ik) = phnew(:,it,ik)
                   !AJ This copy is necessary so the data is in the correct place for the next 
                   !AJ communication (the sending from supercell head to the first proc in the 
                   !AJ g_lo list).
                   tempdata(:,tempfield,it,ik) = phnew(:,it,ik)
                   tempfield = tempfield + 1
                end if
                if(has_apar) then
                   gf_apnew(:,it,ik) = apnew(:,it,ik)
                   gf_ap(:,it,ik) = apnew(:,it,ik)
                   !AJ This copy is necessary so the data is in the correct place for the next 
                   !AJ communication (the sending from supercell head to the first proc in the 
                   !AJ g_lo list).
                   tempdata(:,tempfield,it,ik) = apnew(:,it,ik)
                   tempfield = tempfield + 1
                end if
                if(has_bpar) then
                   gf_bpnew(:,it,ik) = bpnew(:,it,ik)
                   gf_bp(:,it,ik) = bpnew(:,it,ik)
                   !AJ This copy is necessary so the data is in the correct place for the next 
                   !AJ communication (the sending from supercell head to the first proc in the 
                   !AJ g_lo list).
                   tempdata(:,tempfield,it,ik) = bpnew(:,it,ik)
                end if
             end if
          end do
       end do
    end do


    call waitall(recvreqnum-1,recv_handles)
    
    do ik=1,self%naky
       if(self%kyb(ik)%is_empty) cycle
       do is=1,self%kyb(ik)%nsupercell
          if(self%kyb(ik)%supercells(is)%is_empty) cycle
          if(.not.self%kyb(ik)%supercells(is)%is_head) cycle
          do ic=1,self%kyb(ik)%supercells(is)%ncell                
             it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind
             sender = proc_id(gf_lo,idx(gf_lo,ik,it))
             if(sender .ne. iproc) then
                tempfield = 1
                if(has_phi) then
                   gf_phnew(:,it,ik) = tempdata(:,tempfield,it,ik)
                   gf_ph(:,it,ik) = gf_phnew(:,it,ik)
                   tempfield = tempfield + 1
                end if
                if(has_apar) then
                   gf_apnew(:,it,ik) = tempdata(:,tempfield,it,ik)
                   gf_ap(:,it,ik) = gf_apnew(:,it,ik)
                   tempfield = tempfield + 1
                end if
                if(has_bpar) then
                   gf_bpnew(:,it,ik) = tempdata(:,tempfield,it,ik)
                   gf_bp(:,it,ik) = gf_bpnew(:,it,ik)
                end if
             end if
          end do
       end do
    end do
    
    call waitall(sendreqnum-1,send_handles)


    !AJ Now we collect all the data from the supercell heads to proc0 for diagnostics.  If
    !AJ we implemented distributed diagnostics then this would not be necessary.    
    if(self%fm_sub_headsc_p0%nproc.gt.0)then
       call sum_reduce_sub(tempdata,0,self%fm_sub_headsc_p0)
       
       !AJ proc0 unpacks the received field data and updates it's fields from it.
       !AJ Because proc0 now has all the field data it can update both the gf_lo
       !AJ fields and the g_lo fields.
       if(proc0) then
          
          tempfield = 1
          if(has_phi) then
             ph = tempdata(:,tempfield,:,:)
             phnew = ph
             gf_ph = ph
             gf_phnew = ph
             tempfield = tempfield + 1
          end if
             
          if(has_apar) then
             ap = tempdata(:,tempfield,:,:)
             apnew = ap 
             gf_ap = ap
             gf_apnew = ap
             tempfield = tempfield + 1
          end if
          
          if(has_bpar) then
             bp = tempdata(:,tempfield,:,:)
             bpnew = bp
             gf_bp = bp
             gf_bpnew = bp
          end if
          
       end if
       
    end if

       
    call initialise_requests(send_handles)
    call initialise_requests(recv_handles)
    
    !AJ Now send the data from the supercell head in gf_lo to the first process in the list of owners for
    !AJ the same point in g_lo
    recvreqnum = 1
    do ikit=1, g_lo%ikitrange
       ik=g_lo%local_ikit_points(ikit)%ik
       it=g_lo%local_ikit_points(ikit)%it
       if(kwork_filter(it,ik)) cycle
       sender = self%heads(it,ik)
       if(sender .ne. iproc .and. .not. proc0) then
          if(g_lo%ikit_procs_assignment(it,ik)%proc_list(1) .eq. iproc) then
             tag = ik*self%ntheta0 + it          
             call nbrecv(tempdata(:,:,it,ik),sender,tag,recv_handles(recvreqnum))             
             recvreqnum = recvreqnum + 1
          end if
       end if
    end do
    
    sendreqnum = 1
    
    do ik=1,self%naky
       if(self%kyb(ik)%is_empty) cycle
       do is=1,self%kyb(ik)%nsupercell
          if(self%kyb(ik)%supercells(is)%is_empty) cycle
          if(self%kyb(ik)%supercells(is)%is_head) then
             do ic=1,self%kyb(ik)%supercells(is)%ncell                
                it=self%kyb(ik)%supercells(is)%cells(ic)%it_ind
                if(kwork_filter(it,ik)) cycle
                receiver = g_lo%ikit_procs_assignment(it,ik)%proc_list(1) 
                if(receiver .ne. iproc .and. receiver .ne. 0) then
                   tag = ik*self%ntheta0 + it    
                   call nbsend(tempdata(:,:,it,ik),receiver,tag,send_handles(sendreqnum))
                   sendreqnum = sendreqnum + 1
                !AJ If I'm to be sending to myself then just copy the data from gf_lo arrays (where 
                !AJ is was put in the communication to get it to supercell heads) to the correct g_lo arrays.
                else if(receiver .eq. iproc .and. .not. proc0) then
                   if(has_phi) then
                      phnew(:,it,ik) = gf_ph(:,it,ik)
                      ph(:,it,ik) = phnew(:,it,ik)
                   end if
                   if(has_apar) then
                      apnew(:,it,ik) = gf_ap(:,it,ik)
                      ap(:,it,ik) = apnew(:,it,ik)
                   end if
                   if(has_bpar) then
                      bpnew(:,it,ik) = gf_bp(:,it,ik)
                      bp(:,it,ik) = bpnew(:,it,ik)
                   end if
                end if
             end do
          end if
       end do
    end do
    
    call waitall(recvreqnum-1,recv_handles)
    
    do ikit=1, g_lo%ikitrange
       ik=g_lo%local_ikit_points(ikit)%ik
       it=g_lo%local_ikit_points(ikit)%it
       if(kwork_filter(it,ik)) cycle
       sender = self%heads(it,ik)
       receiver = g_lo%ikit_procs_assignment(it,ik)%proc_list(1) 
       if(sender .ne. iproc .and. receiver .eq. iproc .and. .not. proc0) then
          tempfield = 1
          if(has_phi) then
             phnew(:,it,ik) = tempdata(:,tempfield,it,ik)
             ph(:,it,ik) = phnew(:,it,ik)
             tempfield = tempfield + 1
          end if
          if(has_apar) then
             apnew(:,it,ik) = tempdata(:,tempfield,it,ik)
             ap(:,it,ik) = apnew(:,it,ik)
             tempfield = tempfield + 1
          end if
          if(has_bpar) then
             bpnew(:,it,ik) = tempdata(:,tempfield,it,ik)
             bp(:,it,ik) = bpnew(:,it,ik)
          end if
       end if
    end do
    
    call waitall(sendreqnum-1,send_handles)

    deallocate(send_handles, recv_handles)
    
    !AJ Now get the data from the process in g_lo that has received a given ik,it point to the rest of the 
    !AJ processes that own it.
    !AJ This could be optimised if the xyblock_comm is available, but as a first start the code below works
    !AJ and doesn't involve global collectives.
    do ik = 1,self%naky
       do it = 1,self%ntheta0       
          
          if(kwork_filter(it,ik)) cycle

          if(g_lo%ikit_procs_assignment(it,ik)%mine) then

             if(g_lo%ikit_procs_assignment(it,ik)%num_procs .gt. 1) then
                sender = g_lo%ikit_procs_assignment(it,ik)%sub_proc_list(1)
                local_iproc = g_lo%ikit_procs_assignment(it,ik)%comm%iproc
                
                !AJ This could be avoided on some proceses as tempdata will already have the correct data 
                !AJ (providing sender .ne. receiver in the above calls)
                if(sender .eq. local_iproc .and. .not. proc0) then
                   tempfield = 1
                   if(has_phi) then
                      tempdata(:,tempfield,it,ik) = phnew(:,it,ik)
                      tempfield = tempfield + 1
                   end if
                   
                   if(has_apar) then
                      tempdata(:,tempfield,it,ik) = apnew(:,it,ik)
                      tempfield = tempfield + 1
                   end if
                   
                   if(has_bpar) then
                      tempdata(:,tempfield,it,ik) = bpnew(:,it,ik)
                   end if
                   
                end if
                call broadcast_sub(tempdata(:,:,it,ik),sender,g_lo%ikit_procs_assignment(it,ik)%comm%id)
                
                if(sender .ne. local_iproc .and. .not. proc0) then
                   tempfield = 1
                   if(has_phi) then
                      phnew(:,it,ik) = tempdata(:,tempfield,it,ik)
                      ph(:,it,ik) = phnew(:,it,ik)
                      tempfield = tempfield + 1
                   end if
                   
                   if(has_apar) then
                      apnew(:,it,ik) = tempdata(:,tempfield,it,ik)
                      ap(:,it,ik) = apnew(:,it,ik)
                      tempfield = tempfield + 1
                   end if
                   
                   if(has_bpar) then
                      bpnew(:,it,ik) = tempdata(:,tempfield,it,ik)
                      bp(:,it,ik) = bpnew(:,it,ik)
                   end if
                end if
             end if
          end if
       end do
    end do
    
    deallocate(tempdata)

  end subroutine fm_gather_init_fields