pc_decomp_owncells_simplempi Subroutine

private subroutine pc_decomp_owncells_simplempi(self, fieldmat)

Uses

Cells are decomposed based on proc || Decomp_type=3

Type Bound

pc_type

Arguments

Type IntentOptional Attributes Name
class(pc_type), intent(inout) :: self
class(fieldmat_type), intent(inout) :: fieldmat

Contents


Source Code

  subroutine pc_decomp_owncells_simplempi(self, fieldmat)
    use mp, only: comm_type, sum_allreduce_sub
    use sorting, only: quicksort
    implicit none
    class(pc_type), intent(inout) :: self
    class(fieldmat_type), intent(in out) :: fieldmat
    integer :: ik, is, ic, ifq, it, ip, llim, ulim
    integer :: nrow_tmp, nrow_loc, remainder, np
    integer, dimension(:), allocatable :: nwork, iproc_work
    type(comm_type) :: supercell_comm
    logical :: cell_is_local

    !/Don't need to change pc%is_local as this is already correct

    !Make sure we do the invert locally as mpi based version not implemented!
    self%force_local_invert=.true.

    !Now we say that we can use no gather based getfieldeq
    self%has_to_gather=.false.

    !Now we (re)calculate how much data is available
    call self%count_avail(fieldmat)

    !Initialise the number of rows responsible
    self%nresp_per_cell=0

    !Now loop over all cells, setting the row_llim of
    !their row blocks
    do ik=1,fieldmat%naky
       do is=1,fieldmat%kyb(ik)%nsupercell
          nrow_tmp=fieldmat%kyb(ik)%supercells(is)%nrow
          do ic=1,fieldmat%kyb(ik)%supercells(is)%ncell
             ! If we have this supercell then determine which processors have
             ! this cell and decide which processors get which work. We attempt
             ! to distribute work to those processors with the least amount of
             ! work already allocated.
             if(fieldmat%kyb(ik)%supercells(is)%is_local)then
                ! Copy out a couple of values to make the code more readable
                supercell_comm = fieldmat%kyb(ik)%supercells(is)%cells(ic)%parent_sub
                cell_is_local = fieldmat%kyb(ik)%supercells(is)%cells(ic)%is_local

                ! First determine how many processors in the supercell communicator
                ! have this cell
                np = 0
                if (cell_is_local) np = 1

                call sum_allreduce_sub(np, supercell_comm%id)

                ! Now we can determine what the effective processor rank should be for distributing work.
                ! Essentially we just want to number the processors with this cell from 0 to np in order of
                ! how little work they currently have.

                ! First record how much work each processors has, forcing those without this cell to report -ve work
                allocate(nwork(0:supercell_comm%nproc-1))
                nwork = 0

                if (cell_is_local) then
                   nwork(supercell_comm%iproc) = self%current_nresp()
                else
                   nwork(supercell_comm%iproc) = -1
                end if

                ! Now all processors in supercell communicator know how much work each processor has
                ! except those which aren't local and hence can't take part.
                call sum_allreduce_sub(nwork, supercell_comm%id)

                ! Now we have to label the processors in sorted order. To do this we first create an array
                ! of sequential processor ids and we then sort this based on the amount of work, as stored in nwork
                allocate(iproc_work(0:supercell_comm%nproc-1))
                iproc_work(:) = [(ip, ip=0, supercell_comm%nproc-1)]

                ! Now sort iproc_work based on nwork
                call quicksort(supercell_comm%nproc, nwork, iproc_work)

                if (cell_is_local) then
                   ! With 2008 standard can just do:
                   ! ip_expect = findloc(iproc_work, supercell_comm%iproc)
                   ! instead of this loop
                   ! Search the processor list to find where the processor's id is found
                   do ip = 0, supercell_comm%nproc-1
                      if(iproc_work(ip) == supercell_comm%iproc) exit
                   end do

                   ! Here we have to remove any offset in ip arising
                   ! from the processors without this cell. As they
                   ! are assigned -1 work they come at the start of
                   ! the array.
                   ip = ip - count(nwork == -1)
                end if
                deallocate(nwork, iproc_work)
             endif

             !NOTE: By sorting the procs as done above we end up with lots of different procs
             !which are proc0 for at least one supercell. This means that later when we assign
             !the supercell heads we have lots of members, whilst if we try to minimise the number
             !of unique procs which are proc0 for 1 or more supercells then there will be fewer
             !members and the subsequent allreduce will be cheaper.

             !Store it index
             it=fieldmat%kyb(ik)%supercells(is)%cells(ic)%it_ind

             !If local work out limits, if not just set to 0.
             if(fieldmat%kyb(ik)%supercells(is)%cells(ic)%is_local)then

                !Calculate row limits based on cell size, nprocs and
                !proc rank
                !/Simple block size
                nrow_loc=max(ceiling(nrow_tmp*1.0/np),minNRow)
                !By enforcing a minimum value of nrow_loc we can ensure
                !that blocks don't get too small. Note it's easy to work
                !out the limits for any of the other procs *without* comms.
                !Hence should be quite cheap to check how many empty procs
                !we have, how small the smallest block is etc. So shouldn't
                !be too expensive to work out if we should do an extra row
                !or two if it will remove procs with little work.
                remainder=nrow_tmp-nrow_loc*np !Note this should be -ve due to use of ceiling
                
                !Now work out limits
                llim=1+ip*(nrow_loc)
                ulim=llim+nrow_loc-1

                !Now make sure that we don't overstep the cell limits
                if(llim>nrow_tmp) then
                   !If the lower limit is too big then this proc should
                   !be made empty
                   llim=0
                   ulim=0
                else if(ulim>nrow_tmp) then
                   !If just the upper limit is too big then
                   !lower it
                   ulim=nrow_tmp
                endif

                !Now loop over row blocks to set index
                do ifq=1,fieldmat%kyb(ik)%supercells(is)%cells(ic)%nrb
                   fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_llim=llim
                   fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_ulim=ulim
                   call fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%set_nrow
                enddo

             else
                do ifq=1,fieldmat%kyb(ik)%supercells(is)%cells(ic)%nrb
                   fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_llim=0
                   fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%row_ulim=0
                   call fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(ifq)%set_nrow
                enddo
             endif

             !Record the number of responsible rows, note we assume all rb have same size
             if(size(fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb)>0)then
                self%nresp_per_cell(it,ik)=fieldmat%kyb(ik)%supercells(is)%cells(ic)%rb(1)%nrow
             else
                self%nresp_per_cell(it,ik)=0
             endif
          enddo
       enddo
    enddo
  end subroutine pc_decomp_owncells_simplempi