Cells are decomposed based on proc || Decomp_type=3
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
class(pc_type), | intent(inout) | :: | self | |||
class(fieldmat_type), | intent(inout) | :: | fieldmat |
subroutine pc_decomp_owncells_simplempi(self, fieldmat)
use mp, only: comm_type, sum_allreduce_sub
use sorting, only: quicksort
implicit none
class(pc_type), intent(inout) :: self
class(fieldmat_type), intent(in out) :: fieldmat
integer :: ik, is, ic, ifq, it, ip, llim, ulim
integer :: nrow_tmp, nrow_loc, remainder, np
integer, dimension(:), allocatable :: nwork, iproc_work
type(comm_type) :: supercell_comm
logical :: cell_is_local
!/Don't need to change pc%is_local as this is already correct
!Make sure we do the invert locally as mpi based version not implemented!
self%force_local_invert=.true.
!Now we say that we can use no gather based getfieldeq
self%has_to_gather=.false.
!Now we (re)calculate how much data is available
call self%count_avail(fieldmat)
!Initialise the number of rows responsible
self%nresp_per_cell=0
!Now loop over all cells, setting the row_llim of
!their row blocks
do ik=1,fieldmat%naky
do is=1,fieldmat%kyb(ik)%nsupercell
nrow_tmp=fieldmat%kyb(ik)%supercells(is)%nrow
do ic=1,fieldmat%kyb(ik)%supercells(is)%ncell
! If we have this supercell then determine which processors have
! this cell and decide which processors get which work. We attempt
! to distribute work to those processors with the least amount of
! work already allocated.
if(fieldmat%kyb(ik)%supercells(is)%is_local)then
! Copy out a couple of values to make the code more readable
supercell_comm = fieldmat%kyb(ik)%supercells(is)%cells(ic)%parent_sub
cell_is_local = fieldmat%kyb(ik)%supercells(is)%cells(ic)%is_local
! First determine how many processors in the supercell communicator
! have this cell
np = 0
if (cell_is_local) np = 1
call sum_allreduce_sub(np, supercell_comm%id)
! Now we can determine what the effective processor rank should be for distributing work.
! Essentially we just want to number the processors with this cell from 0 to np in order of
! how little work they currently have.
! First record how much work each processors has, forcing those without this cell to report -ve work
allocate(nwork(0:supercell_comm%nproc-1))
nwork = 0
if (cell_is_local) then
nwork(supercell_comm%iproc) = self%current_nresp()
else
nwork(supercell_comm%iproc) = -1
end if
! Now all processors in supercell communicator know how much work each processor has
! except those which aren't local and hence can't take part.
call sum_allreduce_sub(nwork, supercell_comm%id)
! Now we have to label the processors in sorted order. To do this we first create an array
! of sequential processor ids and we then sort this based on the amount of work, as stored in nwork
allocate(iproc_work(0:supercell_comm%nproc-1))
iproc_work(:) = [(ip, ip=0, supercell_comm%nproc-1)]
! Now sort iproc_work based on nwork
call quicksort(supercell_comm%nproc, nwork, iproc_work)
if (cell_is_local) then
! With 2008 standard can just do:
! ip_expect = findloc(iproc_work, supercell_comm%iproc)
! instead of this loop
! Search the processor list to find where the processor's id is found
do ip = 0, supercell_comm%nproc-1
if(iproc_work(ip) == supercell_comm%iproc) exit
end do
! Here we have to remove any offset in ip arising
! from the processors without this cell. As they
! are assigned -1 work they come at the start of
! the array.
ip = ip - count(nwork == -1)
end if
deallocate(nwork, iproc_work)
endif
!NOTE: By sorting the procs as done above we end up with lots of different procs
!which are proc0 for at least one supercell. This means that later when we assign
!the supercell heads we have lots of members, whilst if we try to minimise the number
!of unique procs which are proc0 for 1 or more supercells then there will be fewer
!members and the subsequent allreduce will be cheaper.
!Store it index
it=fieldmat%kyb(ik)%supercells(is)%cells(ic)%it_ind
!If local work out limits, if not just set to 0.
if(fieldmat%kyb(ik)%supercells(is)%cells(ic)%is_local)then
!Calculate row limits based on cell size, nprocs and
!proc rank
!/Simple block size
nrow_loc=max(ceiling(nrow_tmp*1.0/np),minNRow)
!By enforcing a minimum value of nrow_loc we can ensure
!that blocks don't get too small. Note it's easy to work
!out the limits for any of the other procs *without* comms.
!Hence should be quite cheap to check how many empty procs
!we have, how small the smallest block is etc. So shouldn't
!be too expensive to work out if we should do an extra row
!or two if it will remove procs with little work.
remainder=nrow_tmp-nrow_loc*np !Note this should be -ve due to use of ceiling
!Now work out limits
llim=1+ip*(nrow_loc)
ulim=llim+nrow_loc-1
!Now make sure that we don't overstep the cell limits
if(llim>nrow_tmp) then
!If the lower limit is too big then this proc should
!be made empty
llim=0
ulim=0
else if(ulim>nrow_tmp) then
!If just the upper limit is too big then
!lower it
ulim=nrow_tmp
endif
!Now loop over row blocks to set index
do ifq=1,fieldmat%kyb(ik)%supercells(is)%cells(ic)%nrb
fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_llim=llim
fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_ulim=ulim
call fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%set_nrow
enddo
else
do ifq=1,fieldmat%kyb(ik)%supercells(is)%cells(ic)%nrb
fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_llim=0
fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_ulim=0
call fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%set_nrow
enddo
endif
!Record the number of responsible rows, note we assume all rb have same size
if(size(fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb)>0)then
self%nresp_per_cell(it,ik)=fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(1)%nrow
else
self%nresp_per_cell(it,ik)=0
endif
enddo
enddo
enddo
end subroutine pc_decomp_owncells_simplempi