!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! FIXME : Add documentation
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer, | intent(in) | :: | ntgrid |
subroutine init_y_fft (ntgrid)
use gs2_layouts, only: xxf_lo, yxf_lo, accel_lo, accelx_lo, dealiasing
use fft_work, only: init_crfftw, init_rcfftw, init_ccfftw, FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
#ifdef SHMEM
use shm_mpi3, only : shm_info, shm_alloc
use mp, only : proc0
use gs2_layouts, only: g_lo, accel_lo
#endif
implicit none
integer, intent (in) :: ntgrid
integer :: idx, i
#ifdef SHMEM
integer howmany,stride,n2,n3,idw,nwk,envlen,ts1,ts2
character(len=1) envval
#endif
# if FFT == _FFTW3_
integer :: nb_ffts
#ifdef SHMEM
complex, allocatable :: dummy(:,:)
# endif
# endif
if (initialized_y_fft) return
initialized_y_fft = .true.
if (accel) then
! prepare for dealiasing
allocate (ia (accel_lo%nia))
allocate (iak(accel_lo%nia))
#ifndef SHMEM
allocate ( aidx (accel_lo%llim_proc:accel_lo%ulim_proc))
allocate (ag(-ntgrid:ntgrid,2,accel_lo%llim_proc:accel_lo%ulim_alloc))
#else
allocate ( aidx (accel_lo%llim_node:accel_lo%ulim_node), gidx (accel_lo%llim_node:accel_lo%ulim_node))
call shm_alloc(ag, (/ -ntgrid, ntgrid, 1, 2, accel_lo%llim_proc, accel_lo%ulim_alloc /))
#endif
aidx = .true.
#ifndef SHMEM
do i = accel_lo%llim_proc, accel_lo%ulim_proc
if (dealiasing(accel_lo, i)) cycle
aidx(i) = .false.
end do
do idx = 1, accel_lo%nia
ia (idx) = accelx_lo%llim_proc + (idx-1)*accelx_lo%nxny
iak(idx) = accel_lo%llim_proc + (idx-1)*accel_lo%nxnky
end do
#else
gidx = -1
idx = g_lo%llim_node
do i = accel_lo%llim_node, accel_lo%ulim_node
if (dealiasing(accel_lo, i)) cycle
aidx(i) = .false.
gidx(i) = idx; idx = idx+1
end do
do idx = 1, accel_lo%nia
ia (idx) = accelx_lo%llim_node + (idx-1)*accelx_lo%nxny
iak(idx) = accel_lo%llim_node + (idx-1)*accel_lo%nxnky
end do
#endif
#if FFT == _FFTW3_
!JH FFTW plan creation for the accelerated 2d transforms
call init_crfftw (yf_fft, FFT_TO_REAL_SPACE, accel_lo%ny, accel_lo%nx, &
(2*accel_lo%ntgrid+1) * 2)
call init_rcfftw (yb_fft, FFT_TO_SPECTRAL_SPACE, accel_lo%ny, accel_lo%nx, &
(2*accel_lo%ntgrid+1) * 2)
#ifdef SHMEM
call get_environment_variable("GS2_SHM_2D_PLAN",VALUE=envval,LENGTH=envlen)
if (envlen >0) read(envval,*) use_shm_2d_plan
call get_environment_variable("GS2_SHM_2D_PLAN_BUFF",VALUE=envval, LENGTH=envlen)
if (envlen >0) read(envval,*) use_shm_2d_plan_buff
if (proc0) then
if (use_shm_2d_plan) then
write(*,*)' SHM with 2D plans'
if (use_shm_2d_plan_buff) then
write(*,*)' SHM 2D plans with buffering'
endif
else
write(*,*)' SHM with 1D plans'
endif
endif
call compute_ntgrid_split
if (use_shm_2d_plan) then
n2 = accel_lo%ny
n3 = accel_lo%nx
! attention, trouble if ppn =1
if ( shm_info%size > 1 ) then
if (shm_info%id < shm_info%size / 2) then
idw = shm_info%id
ts2=1
nwk = shm_info%size / 2
else
idw = shm_info%id - shm_info%size / 2
ts2=2
nwk = shm_info%size - shm_info%size / 2! for odd ppn
endif
else
idw = 0
ts2=1
nwk=1
endif
ts1 = -ntgrid + (2*ntgrid+1)/nwk * idw &
+ min(mod(2*ntgrid+1,nwk), idw)
howmany = (2*ntgrid+1)/nwk
if ( idw < mod(2*ntgrid+1,nwk)) howmany = howmany+1
if(shm_info%size == 1) howmany = 2*(2*ntgrid+1)
if ( use_shm_2d_plan_buff) then
stride=howmany
else
stride=2*(2*accel_lo%ntgrid+1)
endif
call init_crfftw (yf_fft_shm, FFT_TO_REAL_SPACE, accel_lo%ny, accel_lo%nx, &
howmany, stride) !(2*accel_lo%ntgrid+1) * 2)
! now the inverse
call init_rcfftw (yb_fft_shm, FFT_TO_SPECTRAL_SPACE, accel_lo%ny, accel_lo%nx, &
howmany, stride) !(2*accel_lo%ntgrid+1) * 2)
! init_... has intent out for plan var
yf_fft_shm%ts1=ts1
yf_fft_shm%ts2=ts2
yb_fft_shm%ts1=ts1
yb_fft_shm%ts2=ts2
else
call init_crfftw(faccel_shmx, FFT_TO_REAL_SPACE, accel_lo%ny, 2*(2*ntgrid+1), .true.)
allocate(dummy( 2*(2*ntgrid+1), accel_lo%ndky * accel_lo%nx))
call init_ccfftw(faccel_shmy, FFT_TO_REAL_SPACE, accel_lo%nx, 2*(2*ntgrid+1), dummy, .true., accel_lo%ndky)
deallocate(dummy)
call init_rcfftw(baccel_shmx, FFT_TO_SPECTRAL_SPACE, accelx_lo%ny, 2*(2*ntgrid+1), .true.)
allocate(dummy( 2*(2*ntgrid+1), accelx_lo%ny * accelx_lo%nx))
call init_ccfftw(baccel_shmy, FFT_TO_SPECTRAL_SPACE, accel_lo%nx, 2*(2*ntgrid+1), dummy, .true.,accel_lo%ndky)
deallocate(dummy)
end if
#endif
#endif
else
! non-accelerated
allocate (fft(yxf_lo%ny/2+1, yxf_lo%llim_proc:yxf_lo%ulim_alloc))
if (.not.allocated(xxf))then
allocate (xxf(xxf_lo%nx,xxf_lo%llim_proc:xxf_lo%ulim_alloc))
endif
if (.not. xfft_initted) then
#if FFT == _FFTW3_
!JH FFTW plan creation for transform x5d and inverse
! number of ffts to be calculated
!JH 7th December 2011
!JH xxf_lo%ulim_alloc is used here rather than xxf_lo%lulim_proc
!JH because there are situations where xxf_lo%llim_proc is greater
!JH than xxf_lo%ulim_proc and that would create a negative number
!JH of FFTs to be calculated. However, xxf_lo%ulim_alloc is set
!JH to be xxf_lo%llim_proc in this situation, and that will give
!JH 1 FFT to be calculated which the code can correctly undertake.
nb_ffts = xxf_lo%ulim_alloc - xxf_lo%llim_proc + 1
call init_ccfftw (xf_fft, FFT_TO_REAL_SPACE, xxf_lo%nx, nb_ffts, xxf)
call init_ccfftw (xb_fft, FFT_TO_SPECTRAL_SPACE, xxf_lo%nx, nb_ffts, xxf)
#endif
xfft_initted = .true.
end if
#if FFT == _FFTW3_
!JH FFTW plan creation for transform y5d and inverse
! number of ffts to be calculated
!JH 7th December 2011
!JH yxf_lo%ulim_alloc is used here rather than yxf_lo%lulim_proc
!JH because there are situations where yxf_lo%llim_proc is greater
!JH than yxf_lo%ulim_proc and that would create a negative number
!JH of FFTs to be calculated. However, yxf_lo%ulim_alloc is set
!JH to be yxf_lo%llim_proc in this situation, and that will give
!JH 1 FFT to be calculated which the code can correctly undertake.
nb_ffts = yxf_lo%ulim_alloc - yxf_lo%llim_proc + 1
call init_crfftw (yf_fft, FFT_TO_REAL_SPACE, yxf_lo%ny, nb_ffts)
call init_rcfftw (yb_fft, FFT_TO_SPECTRAL_SPACE, yxf_lo%ny, nb_ffts)
#endif
end if
#ifdef SHMEM
contains
!> FIXME : Add documentation
subroutine compute_ntgrid_split
implicit none
integer k, ir, idw, nw
! computes only shifts for its ite (base 0)
if (shm_info%size > 1) then
if (shm_info%id < shm_info%size / 2) then
idw = shm_info%id
nw = shm_info%size / 2
ips =1; ipe =1
else
idw = shm_info%id - shm_info%size / 2
nw = shm_info%size - shm_info%size / 2
ips=2;ipe=2
endif
else
idw = 0
ips=1; ipe=2
nw = shm_info%size
end if
k=(2*ntgrid+1)/nw
ir=mod((2*ntgrid+1),nw)
its = idw * k + min(ir, idw)
ite = its + k-1
if ( idw < ir) ite=ite+1
end subroutine compute_ntgrid_split
#endif
end subroutine init_y_fft