c_redist_33_inv_mpi_copy_nonblock

private subroutine c_redist_33_inv_mpi_copy_nonblock(r, from_here, to_here)

Uses

- mp

This routine does redistributes using entirely non-blocking comms.

Note

This routine may use more memory than blocking approach --> Can perform slower than blocking routine when redistribute is particularly large, else typically gives better performance

Arguments

Type	Intent	Attributes		Name
type(redist_type),	intent(inout)		::	r
complex,	intent(in),	dimension ( r%to_low(1):, r%to_low(2):, r%to_low(3): )	::	from_here
complex,	intent(inout),	dimension ( r%from_low(1):, r%from_low(2):, r%from_low(3): )	::	to_here

Source Code

c_redist_33_inv_mpi_copy_nonblock

Source Code

  subroutine c_redist_33_inv_mpi_copy_nonblock(r, from_here, to_here)
    use mp, only: iproc, send, receive
    use mp, only: barrier,waitall,nbsend,nbrecv
    implicit none
    type (redist_type), intent (in out) :: r
    complex, dimension ( &
         r%from_low(1):, &
         r%from_low(2):, &
         r%from_low(3):  &
         ), intent (in out) :: to_here
    complex, dimension (&
         r%to_low(1):,  &
         r%to_low(2):,  &
         r%to_low(3):   &
         ), intent (in) :: from_here
    complex, dimension(:, :), allocatable :: r_buff, s_buff
    integer, dimension(:), allocatable :: recvhand, sendhand
    integer :: i, ip, nn, nrecv, nsend, j

    !Work out how big recv buffer has to be and allocate it
    ! Note that we use redistribute type's nsend (not nrecv) because we are
    ! doing the inverse transform.
    nrecv = r%nsend
    if (nrecv > 0) then
       allocate( r_buff(size(r%complex_buff), nrecv))
       allocate( recvhand(nrecv))
    end if

    !Now work out how big send buffer has to be, allocate it and fill it
    ! Note that we use redistribute type's nrecv (not nsend) because we are
    ! doing the inverse transform.
    nsend = r%nrecv
    if (nsend > 0) then
       allocate( s_buff(size(r%complex_buff), nsend))
       allocate( sendhand(nsend))
       !$OMP PARALLEL DEFAULT(none) &
       !$OMP PRIVATE(i, ip, nn, j) &
       !$OMP SHARED(nsend, s_buff, r, from_here, iproc, sendhand)
       do i = 1, nsend
          ip = r%recv_ip(i)
          nn = r%to(ip)%nn
          !Any benefit in making this a pointer assignment instead?
          !Avoids copying data here but could lead to questionable
          !memory access during the actual send?
          !$OMP DO &
          !$OMP SCHEDULE(static)
          do j = 1, nn
             s_buff(j, i) = from_here( &
                  r%to(ip)%k(j),       &
                  r%to(ip)%l(j),       &
                  r%to(ip)%m(j)        &
                  )
          end do
          !$OMP END DO

          !$OMP MASTER
          call nbsend(s_buff(1:nn, i), nn, ip, iproc, sendhand(i))
          !$OMP END MASTER
       end do
       !$OMP END PARALLEL
    end if

    !Post receives
    if (nrecv > 0) then
       !$OMP PARALLEL DO DEFAULT(none) &
       !$OMP PRIVATE(i, ip, nn) &
       !$OMP SHARED(nrecv, r_buff, r, recvhand) &
       !$OMP SCHEDULE(static)
       do i = 1, nrecv
          ip = r%recv_inv_ip(i)
          nn = r%from(ip)%nn
          call nbrecv(r_buff(1:nn, i), nn, ip, ip, recvhand(i))
       end do
       !$OMP END PARALLEL DO
    end if

    if (nrecv > 0) then
       !Now we don't have anything to do until all the data has arrived (so should we post sends first?)
       !We could do a wait all but may be better to keep on checking each message seperately so that when
       !it arrives we can copy out whilst waiting for others to arrive. 
       !Start with a wait all
       call waitall(nrecv, recvhand)

       !Now unpack data
       !$OMP PARALLEL DEFAULT(none) &
       !$OMP PRIVATE(i, ip, nn, j) &
       !$OMP SHARED(nrecv, r, to_here, r_buff)
       do i = 1, nrecv
          !Which processor is this data from
          ip = r%recv_inv_ip(i)
          nn = r%from(ip)%nn
          !$OMP DO &
          !$OMP SCHEDULE(static)
          do j = 1, nn
             to_here(              &
                  r%from(ip)%k(j), &
                  r%from(ip)%l(j), &
                  r%from(ip)%m(j)  &
                  ) = r_buff(j, i)
          enddo
          !$OMP END DO NOWAIT
       enddo
       !$OMP END PARALLEL

       deallocate(r_buff)
       deallocate(recvhand)
    endif

    if (nsend > 0) then
       call waitall(nsend, sendhand)
       deallocate(s_buff)
       deallocate(sendhand)
    endif

  end subroutine c_redist_33_inv_mpi_copy_nonblock

c_redist_33_inv_mpi_copy_nonblock Subroutine

Contents

Source Code

private subroutine c_redist_33_inv_mpi_copy_nonblock(r, from_here, to_here)

Uses

Note

Arguments

Contents

Source Code

Source Code