c_redist_32_new_copy Subroutine

private subroutine c_redist_32_new_copy(r, from_here, to_here)

Uses

FIXME : Add documentation

Arguments

Type IntentOptional Attributes Name
type(redist_type), intent(inout) :: r
complex, intent(in), dimension (r%from_low(1):, r%from_low(2):, r%from_low(3):) :: from_here
complex, intent(inout), dimension (r%to_low(1):, r%to_low(2):) :: to_here

Contents

Source Code


Source Code

  subroutine c_redist_32_new_copy(r, from_here, to_here)
!=====================================================================
!AJ, June 2011: New code from DCSE project on GS2 Indirect Addressing
!=====================================================================
!
! AJ, June 2011:
!  Modified LOCAL COPY part of c_redist_32 routine, as used by GS2 to 
!  transform g data type to xxf data type.
!  Here we REDUCE indirect addressing and cache load by EXPLOITING 
!  understanding of g and xxf data types in GS2.
!
    use mp, only: iproc
    implicit none
    type (redist_type), intent (in out) :: r

    complex, dimension (r%from_low(1):, &
                        r%from_low(2):, &
                        r%from_low(3):), intent (in) :: from_here

    complex, dimension (r%to_low(1):, &
                        r%to_low(2):), intent (in out) :: to_here

    integer :: i,k,t2,t1,f3,f2,f1,fhigh,thigh,f2max
    real :: nakyrecip

    i = 1
!AJ We want to be able to divide by naky with floating point 
!AJ arithmetic so we need to convert naky to a float (hence
!AJ the need for nakyrecip).  Also, to remove the necessity 
!AJ to have a division in the body of the loop below we are 
!AJ calculating the reciprocal of naky so that instead of 
!AJ dividing by naky we can multiply  by 1/naky.
    nakyrecip = naky
    nakyrecip = 1/nakyrecip
    f2max = r%from_high(2)
!AJ Loop over all local copies from THIS proc (iproc) to THIS proc
    do while(i .le. r%from(iproc)%nn)
!AJ Initialise the inner loop indices.  As the from_here array has 
!AJ 3 indices (c_redist_32 goes from 3 indices to 2 indices in the
!AJ copy) we need two inner loops to be able to move through all 
!AJ the to_here and from_here indices.
!AJ t1 (which equates to the it index) and f3 (which equates to iglo)
!AJ do not change for the iterations of the inner loops.  f2 (isgn) 
!AJ can only be 1 or 2 so the first inner loop is restricted to at 
!AJ most 2 iterations.
       f2 = r%from(iproc)%l(i)
       f3 = r%from(iproc)%m(i)
       t1 = r%to(iproc)%k(i)
!AJ Ensure that isgn (f2) is either 1 or 2.
       do while (f2 .le. f2max)
!AJ Get initial value of f1 (which equates to ig) and t2 (which equates
!AJ to ixxf).
          f1 = r%from(iproc)%k(i)
          t2 = r%to(iproc)%l(i)
!AJ Work out the maximum value ixxf (t2) can have by calculating the range 
!AJ of ixxf that this process owns.  We step through t2 by naky each iteration
!AJ so the line below calculates thigh as the number of ixxf steps for these 
!AJ inner loops of the operation owned by this process.
          thigh = ceiling(((xxf_lo_ulim_proc+1) - t2)*nakyrecip)
!AJ from_here and to_here are calculated for c_redist_32 using ig, isgn, 
!AJ and iglo.  The calculation of the maximum number if ixxf steps above can, 
!AJ therefore, be used to also calculate the maximum number of ig setsp.  THis 
!AJ is what the following line does.
          thigh = thigh + (f1-1)
!AJ Finally check the actual number of inner loop steps by ensuring that the 
!AJ computed maximum bound of ig is not beyond the actual allowed maximum value 
!AJ in r%from_high(1).
          fhigh = min(thigh,r%from_high(1))
          do k = f1,fhigh
             to_here(t1,t2) = from_here(k,f2,f3)
             t2 = t2 + naky
             i = i + 1
          end do
!AJ If at the end of the inner loop we still have theoretical iterations 
!AJ left on ixxf (i.e. the calculated thigh is higher than the allowed 
!AJ maximum of ig) then move to the next isgn.  If there aren't any 
!AJ iterations left then exit the inner loops (the f2 loop).
          if(thigh .gt. r%from_high(1)) then
             f2 = f2 + 1
          else
             f2 = f2max + 1
          end if
       end do
    end do

  end subroutine c_redist_32_new_copy