init_y_fft Subroutine

private subroutine init_y_fft(ntgrid)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! FIXME : Add documentation

Arguments

Type IntentOptional Attributes Name
integer, intent(in) :: ntgrid

Contents

Source Code


Source Code

  subroutine init_y_fft (ntgrid)
    use gs2_layouts, only: xxf_lo, yxf_lo, accel_lo, accelx_lo, dealiasing
    use fft_work, only: init_crfftw, init_rcfftw, init_ccfftw, FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
#ifdef SHMEM
    use shm_mpi3, only : shm_info, shm_alloc
    use mp, only : proc0
    use gs2_layouts, only: g_lo, accel_lo
#endif
    implicit none
    integer, intent (in) :: ntgrid
    
    integer :: idx, i
#ifdef SHMEM
    integer howmany,stride,n2,n3,idw,nwk,envlen,ts1,ts2
    character(len=1) envval
#endif

# if FFT == _FFTW3_
    integer :: nb_ffts
#ifdef SHMEM
    complex, allocatable :: dummy(:,:)
# endif
# endif

    if (initialized_y_fft) return
    initialized_y_fft = .true.

    if (accel) then
       
       ! prepare for dealiasing 
       allocate (ia (accel_lo%nia))
       allocate (iak(accel_lo%nia))
#ifndef SHMEM 
       allocate ( aidx (accel_lo%llim_proc:accel_lo%ulim_proc))
       allocate (ag(-ntgrid:ntgrid,2,accel_lo%llim_proc:accel_lo%ulim_alloc))
#else
       allocate ( aidx (accel_lo%llim_node:accel_lo%ulim_node), gidx (accel_lo%llim_node:accel_lo%ulim_node))
       call shm_alloc(ag, (/ -ntgrid, ntgrid, 1, 2, accel_lo%llim_proc, accel_lo%ulim_alloc /))
#endif
       aidx = .true.

#ifndef SHMEM
       do i = accel_lo%llim_proc, accel_lo%ulim_proc
          if (dealiasing(accel_lo, i)) cycle
          aidx(i) = .false.
       end do
       do idx = 1, accel_lo%nia
          ia (idx) = accelx_lo%llim_proc + (idx-1)*accelx_lo%nxny
          iak(idx) = accel_lo%llim_proc  + (idx-1)*accel_lo%nxnky
       end do
#else
       gidx = -1
       idx = g_lo%llim_node
       do i = accel_lo%llim_node, accel_lo%ulim_node
          if (dealiasing(accel_lo, i)) cycle
          aidx(i) = .false.
          gidx(i) = idx; idx = idx+1
       end do

       do idx = 1, accel_lo%nia
          ia (idx) = accelx_lo%llim_node + (idx-1)*accelx_lo%nxny
          iak(idx) = accel_lo%llim_node  + (idx-1)*accel_lo%nxnky
       end do
#endif

#if FFT == _FFTW3_
       !JH FFTW plan creation for the accelerated 2d transforms
       call init_crfftw (yf_fft, FFT_TO_REAL_SPACE, accel_lo%ny, accel_lo%nx, &
            (2*accel_lo%ntgrid+1) * 2)
       call init_rcfftw (yb_fft, FFT_TO_SPECTRAL_SPACE, accel_lo%ny, accel_lo%nx, &
            (2*accel_lo%ntgrid+1) * 2)
#ifdef SHMEM
       call get_environment_variable("GS2_SHM_2D_PLAN",VALUE=envval,LENGTH=envlen)
       if (envlen >0) read(envval,*) use_shm_2d_plan
       call get_environment_variable("GS2_SHM_2D_PLAN_BUFF",VALUE=envval, LENGTH=envlen)
       if (envlen >0) read(envval,*) use_shm_2d_plan_buff
       if (proc0) then
          if (use_shm_2d_plan) then
             write(*,*)' SHM with 2D plans'      
             if (use_shm_2d_plan_buff) then 
                write(*,*)' SHM 2D plans with buffering'
             endif
          else 
             write(*,*)' SHM with 1D plans'
          endif
       endif
       
       call compute_ntgrid_split
       
       if (use_shm_2d_plan) then
          n2 = accel_lo%ny
          n3 = accel_lo%nx
          
          ! attention, trouble if ppn =1
          if ( shm_info%size > 1 ) then
             if (shm_info%id < shm_info%size / 2) then
                idw = shm_info%id
                ts2=1
                nwk = shm_info%size / 2
             else
                idw = shm_info%id - shm_info%size / 2
                ts2=2
                nwk = shm_info%size - shm_info%size / 2! for odd ppn 
             endif
          else
             idw = 0
             ts2=1
             nwk=1
          endif
          ts1 = -ntgrid + (2*ntgrid+1)/nwk * idw &
               + min(mod(2*ntgrid+1,nwk), idw)
          howmany =  (2*ntgrid+1)/nwk
          if ( idw < mod(2*ntgrid+1,nwk)) howmany = howmany+1
          if(shm_info%size == 1) howmany = 2*(2*ntgrid+1)

          if ( use_shm_2d_plan_buff) then
             stride=howmany
          else
             stride=2*(2*accel_lo%ntgrid+1)
          endif

          call init_crfftw (yf_fft_shm, FFT_TO_REAL_SPACE, accel_lo%ny, accel_lo%nx, &
               howmany, stride) !(2*accel_lo%ntgrid+1) * 2)

          ! now the inverse        
          call init_rcfftw (yb_fft_shm, FFT_TO_SPECTRAL_SPACE, accel_lo%ny, accel_lo%nx, &
               howmany, stride) !(2*accel_lo%ntgrid+1) * 2)

          ! init_... has intent out for plan var
          yf_fft_shm%ts1=ts1
          yf_fft_shm%ts2=ts2
          yb_fft_shm%ts1=ts1
          yb_fft_shm%ts2=ts2

       else
          call init_crfftw(faccel_shmx, FFT_TO_REAL_SPACE, accel_lo%ny, 2*(2*ntgrid+1), .true.)
          allocate(dummy( 2*(2*ntgrid+1), accel_lo%ndky * accel_lo%nx))
          call init_ccfftw(faccel_shmy, FFT_TO_REAL_SPACE, accel_lo%nx, 2*(2*ntgrid+1), dummy, .true., accel_lo%ndky)
          deallocate(dummy)
          call init_rcfftw(baccel_shmx, FFT_TO_SPECTRAL_SPACE, accelx_lo%ny, 2*(2*ntgrid+1), .true.)
          allocate(dummy( 2*(2*ntgrid+1), accelx_lo%ny * accelx_lo%nx))
          call init_ccfftw(baccel_shmy, FFT_TO_SPECTRAL_SPACE, accel_lo%nx, 2*(2*ntgrid+1), dummy, .true.,accel_lo%ndky)
          deallocate(dummy)
       end if
#endif

#endif
    else
       ! non-accelerated
       allocate (fft(yxf_lo%ny/2+1, yxf_lo%llim_proc:yxf_lo%ulim_alloc))
       if (.not.allocated(xxf))then
          allocate (xxf(xxf_lo%nx,xxf_lo%llim_proc:xxf_lo%ulim_alloc))
       endif

       if (.not. xfft_initted) then
#if FFT == _FFTW3_
          !JH FFTW plan creation for transform x5d and inverse
          ! number of ffts to be calculated
          !JH 7th December 2011
          !JH xxf_lo%ulim_alloc is used here rather than xxf_lo%lulim_proc
          !JH because there are situations where xxf_lo%llim_proc is greater 
          !JH than xxf_lo%ulim_proc and that would create a negative number 
          !JH of FFTs to be calculated.  However, xxf_lo%ulim_alloc is set
          !JH to be xxf_lo%llim_proc in this situation, and that will give 
          !JH 1 FFT to be calculated which the code can correctly undertake.
          nb_ffts = xxf_lo%ulim_alloc - xxf_lo%llim_proc + 1

          call init_ccfftw (xf_fft, FFT_TO_REAL_SPACE, xxf_lo%nx, nb_ffts, xxf)
          call init_ccfftw (xb_fft, FFT_TO_SPECTRAL_SPACE, xxf_lo%nx, nb_ffts, xxf)
#endif
          xfft_initted = .true.
       end if

#if FFT == _FFTW3_
       !JH FFTW plan creation for transform y5d and inverse
       ! number of ffts to be calculated
       !JH 7th December 2011
       !JH yxf_lo%ulim_alloc is used here rather than yxf_lo%lulim_proc
       !JH because there are situations where yxf_lo%llim_proc is greater 
       !JH than yxf_lo%ulim_proc and that would create a negative number 
       !JH of FFTs to be calculated.  However, yxf_lo%ulim_alloc is set
       !JH to be yxf_lo%llim_proc in this situation, and that will give 
       !JH 1 FFT to be calculated which the code can correctly undertake.
       nb_ffts = yxf_lo%ulim_alloc - yxf_lo%llim_proc + 1

       call init_crfftw (yf_fft, FFT_TO_REAL_SPACE, yxf_lo%ny, nb_ffts)
       call init_rcfftw (yb_fft, FFT_TO_SPECTRAL_SPACE,  yxf_lo%ny, nb_ffts)
#endif
    end if

#ifdef SHMEM
  contains
    !> FIXME : Add documentation
    subroutine compute_ntgrid_split
      implicit none
      integer k, ir, idw, nw

      ! computes only shifts for its ite (base 0)

      if (shm_info%size > 1) then
         if (shm_info%id < shm_info%size / 2) then
            idw = shm_info%id
            nw  = shm_info%size / 2
            ips =1; ipe =1
         else
            idw = shm_info%id - shm_info%size / 2
            nw = shm_info%size - shm_info%size / 2
            ips=2;ipe=2
         endif
      else
         idw = 0
         ips=1; ipe=2
         nw = shm_info%size
      end if
      k=(2*ntgrid+1)/nw
      ir=mod((2*ntgrid+1),nw)
      its = idw * k + min(ir, idw) 
      ite = its + k-1
      if ( idw < ir) ite=ite+1
    end subroutine compute_ntgrid_split
#endif   

  end subroutine init_y_fft