#include "define.inc" #include "unused_dummy.inc" #if FFT == _FFTW_ #error Support for FFTW2 has been removed, please switch to FFTW3 #endif !> Provides some wrappers for managing FFTW plans. !> !> The initialisation routines ([[init_crfftw]], [[init_rcfftw]], !> [[init_ccfftw]]) create FFTW plans, storing them in an instance of !> [[fft_type]], along with some useful metadata, notably !> [[fft_type:scale]]. module fft_work ! Modifications for using FFTW version 3: ! (c) The Numerical Algorithms Group (NAG) Ltd, 2009 ! on behalf of the HECToR project use, intrinsic:: iso_c_binding, only: C_PTR, C_NULL_PTR #if FFT == _FFTW3_ #ifdef SINGLE_PRECISION use fft_wrapper, only: fftw_plan_many_dft => fftwf_plan_many_dft use fft_wrapper, only: fftw_plan_many_dft_c2r => fftwf_plan_many_dft_c2r use fft_wrapper, only: fftw_plan_many_dft_r2c => fftwf_plan_many_dft_r2c use fft_wrapper, only: fftw_init_threads => fftwf_init_threads use fft_wrapper, only: fftw_plan_with_nthreads => fftwf_plan_with_nthreads use fft_wrapper, only: fftw_destroy_plan => fftwf_destroy_plan use fft_wrapper, only: fftw_cleanup => fftwf_cleanup use fft_wrapper, only: fftw_execute_dft => fftwf_execute_dft use fft_wrapper, only: fftw_execute_dft_r2c => fftwf_execute_dft_r2c use fft_wrapper, only: fftw_execute_dft_c2r => fftwf_execute_dft_c2r use fft_wrapper, only: fftw_export_wisdom_to_filename => fftwf_export_wisdom_to_filename use fft_wrapper, only: fftw_import_wisdom_from_filename => fftwf_import_wisdom_from_filename #else use fft_wrapper, only: fftw_plan_many_dft use fft_wrapper, only: fftw_plan_many_dft_c2r use fft_wrapper, only: fftw_plan_many_dft_r2c use fft_wrapper, only: fftw_init_threads use fft_wrapper, only: fftw_plan_with_nthreads use fft_wrapper, only: fftw_destroy_plan use fft_wrapper, only: fftw_cleanup use fft_wrapper, only: fftw_execute_dft use fft_wrapper, only: fftw_execute_dft_r2c use fft_wrapper, only: fftw_execute_dft_c2r use fft_wrapper, only: fftw_export_wisdom_to_filename use fft_wrapper, only: fftw_import_wisdom_from_filename #endif use fft_wrapper, only: FFTW_PATIENT, FFTW_ESTIMATE, FFTW_UNALIGNED #endif !These are always defined use fft_wrapper, only: FFT_TO_SPECTRAL_SPACE => FFTW_FORWARD use fft_wrapper, only: FFT_TO_REAL_SPACE => FFTW_BACKWARD implicit none private public :: measure_plan, finish_fft_work, time_fft, fft_type public :: init_ccfftw, init_crfftw, init_rcfftw, init_z public :: FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE public :: save_wisdom, load_wisdom interface init_crfftw module procedure init_crfftw_1d module procedure init_crfftw_2d end interface interface init_rcfftw module procedure init_rcfftw_1d module procedure init_rcfftw_2d end interface !> If true, then use FFTW_PATIENT to pick the optimal FFT algorithm, !> otherwise use FFTW_ESTIMATE (potentially slower but more !> reproducible results). FFTW_PATIENT takes longer to set up the !> plan initially, but may be faster overall. logical :: measure_plan = .true. !> Information about a planned FFT transform type :: fft_type !> True if the plan has been created logical :: created = .false. !> Number of points in the array to be transformed integer :: n = 0 !> Direction of the FFT, can be either [[FFT_FORWARD]] or [[FFTW_BACKWARD]] integer :: direction = 0 !> The FFTW plan object type(C_PTR) :: plan = C_NULL_PTR !> How many independent slices in the array to be transformed integer :: howmany = 0 !> True if the array is strided or 2D logical :: strided = .false. #ifdef SHMEM !> start indices for rank partition in first 2 dims integer :: ts1 = -1, ts2 = -1 #endif !> Normalisation factor real :: scale = 0 !> If true, then the result of the FFT needs to be multiplied by !> [[fft_type:scale]]. If false, then the scaling can be skipped, !> saving some work logical :: must_scale = .false. contains procedure, public :: execute_c2c, execute_r2c, execute_c2r ! Unfortunately we can't overload the execute_* routines due to the ! assumed shape which doesn't seem to carry through in interfaces. ! Otherwise generic :: execute => execute_c2c, execute_c2r, execute_r2c procedure, public :: delete => delete_fft procedure, private :: init => basic_init end type fft_type !> Total time taken setting up and performing FFTs real :: time_fft(2) = 0. !> True if this module has been initialised logical :: initialised = .false. contains !> Small wrapper to bind execute c2c to fft_type subroutine execute_c2c(self, array_in, array_out) class(fft_type), intent(in) :: self complex, dimension(*), intent(in out) :: array_in complex, dimension(*), intent(out) :: array_out #if FFT == _FFTW3_ call fftw_execute_dft(self%plan, array_in, array_out) #else UNUSED_DUMMY(self) !Can't do UNUSED_DUMMY on assumed shape arrays if (.false.) write(*,*) array_out(1), array_in(1) #endif end subroutine execute_c2c !> Small wrapper to bind execute r2c to fft_type subroutine execute_r2c(self, array_in, array_out) class(fft_type), intent(in) :: self real, dimension(*), intent(in out) :: array_in complex, dimension(*), intent(out) :: array_out #if FFT == _FFTW3_ call fftw_execute_dft_r2c(self%plan, array_in, array_out) #else UNUSED_DUMMY(self) if (.false.) write(*,*) array_out(1), array_in(1) #endif end subroutine execute_r2c !> Small wrapper to bind execute c2r to fft_type subroutine execute_c2r(self, array_in, array_out) class(fft_type), intent(in) :: self complex, dimension(*), intent(in out) :: array_in real, dimension(*), intent(out) :: array_out #if FFT == _FFTW3_ call fftw_execute_dft_c2r(self%plan, array_in, array_out) #else UNUSED_DUMMY(self) if (.false.) write(*,*) array_out(1), array_in(1) #endif end subroutine execute_c2r !> Destroy FFT plan !> !> This is safe to call on previously destroyed or !> non-constructed/empty plans subroutine delete_fft(self) use job_manage, only: time_message implicit none class(fft_type), intent(in out) :: self !Don't try to delete ffts which have not been created if (.not. self%created) return call time_message(.false., time_fft, ' FFT') #if FFT == _FFTW3_ call fftw_destroy_plan(self%plan) # endif select type (self) type is(fft_type) self = fft_type() end select call time_message(.false., time_fft, ' FFT') end subroutine delete_fft !> Perform any one off setup of the fft library. !> This is currently mostly only needed for builds with !> OpenMP for threaded transforms. subroutine init_fft_work #if FFT == _FFTW3_ #ifdef OPENMP use omp_lib, only: omp_get_num_threads implicit none integer :: ierr, nthreads if (initialised) return initialised = .true. !$OMP PARALLEL nthreads = omp_get_num_threads() !$OMP END PARALLEL ierr = fftw_init_threads() call fftw_plan_with_nthreads(nthreads) #endif #endif end subroutine init_fft_work !> Does some common initialisation for [[fft_type]] subroutine basic_init(self, n, direction, howmany, strided) use mp, only: mp_abort use print_utils, only: to_string class(fft_type), intent(in out) :: self !> Number of points integer, intent(in) :: n !> Direction of FFT, must be either [[FFT_TO_REAL_SPACE]] or [[FFT_TO_SPECTRAL_SPACE]] integer, intent(in) :: direction !> Number of independent FFTs integer, intent(in) :: howmany !> Is array strided/2D logical, intent(in) :: strided if (.not. initialised) call init_fft_work self%n = n self%direction = direction if (direction == FFT_TO_SPECTRAL_SPACE) then self%scale = 1 / real(n) self%must_scale = .true. else if (direction == FFT_TO_REAL_SPACE) then self%scale = 1 self%must_scale = .false. else call mp_abort(& "Unexpected FFT direction, expected one of FFT_TO_REAL_SPACE, " & // "FFT_TO_SPECTRAL_SPACE, got " // to_string(direction)) end if self%howmany = howmany ! Note we don't use strided for anything self%strided = strided ! Note we haven't actually created the plan yet, so this is perhaps a bit ! misleading self%created = .true. end subroutine basic_init # if FFT == _FFTW3_ !> Set flags for FFTW3 pure integer function fftw_flags() if (measure_plan) then fftw_flags = FFTW_PATIENT + FFTW_UNALIGNED else fftw_flags = FFTW_ESTIMATE + FFTW_UNALIGNED endif end function fftw_flags #endif !> FIXME : Add documentation subroutine init_z (fft, direction, n, howmany) use job_manage, only: time_message implicit none type (fft_type), intent (out) :: fft integer, intent (in) :: direction, n integer, intent (in) :: howmany # if FFT == _FFTW3_ complex, dimension (:,:), allocatable :: dummy_in_data, dummy_out_data integer, dimension (1) :: array_n, embed # endif call time_message(.false., time_fft, ' FFT') call fft%init(n, direction, howmany, .false.) #if FFT == _FFTW3_ array_n = n embed = n+1 allocate (dummy_in_data(n+1, howmany), dummy_out_data(n+1, howmany)) fft%plan = fftw_plan_many_dft(1, array_n, howmany, & dummy_in_data, embed, 1, n+1, & dummy_out_data, embed, 1, n+1, & direction, fftw_flags()) deallocate (dummy_in_data, dummy_out_data) # endif call time_message(.false., time_fft, ' FFT') end subroutine init_z !> Create an FFTW plan for complex-to-complex transforms subroutine init_ccfftw (fft, direction, n, howmany, data_array, transpose, m) use mp, only: mp_abort use job_manage, only: time_message use optionals, only: get_option_with_default implicit none !> The created plan type (fft_type), intent (out) :: fft !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]] integer, intent (in) :: direction !> Number of points in array to transform integer, intent (in) :: n !> Number of independent FFTs in array integer, intent (in) :: howmany !> Example array to transform complex, dimension(:,:), intent(inout) :: data_array !> If true, then take FFT of transpose of array logical, optional, intent(in) :: transpose !> Size of transpose direction. Required if `transpose` is true integer, optional, intent(in) :: m # if FFT == _FFTW3_ integer, dimension(1) :: array_n integer :: stride, dist # endif call time_message(.false., time_fft, ' FFT') call fft%init(n, direction, howmany, .false.) #if FFT == _FFTW3_ ! the planer expects this as an array of size 1 array_n = n if (get_option_with_default(transpose, .false.)) then if (.not. present(m)) call mp_abort("Missing argument 'm' for transpose FFT") stride = howmany * m dist = 1 else stride = 1 dist = n end if fft%plan = fftw_plan_many_dft(1, array_n, howmany, & data_array, array_n, stride, dist, & data_array, array_n, stride, dist, & direction, fftw_flags()) #else UNUSED_DUMMY(data_array); UNUSED_DUMMY(transpose); UNUSED_DUMMY(m) # endif call time_message(.false., time_fft, ' FFT') end subroutine init_ccfftw !> Create FFTW plan suitable for 1D real to complex transformation subroutine init_rcfftw_1d (fft, direction, n, howmany, transposed) use job_manage, only: time_message use optionals, only: get_option_with_default implicit none !> The created plan type (fft_type), intent (out) :: fft !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]] integer, intent (in) :: direction !> Number of points in array to transform integer, intent (in) :: n !> Number of independent FFTs in array integer, intent (in) :: howmany !> If true, then take FFT of transpose of array logical, optional, intent(in) :: transposed #if FFT == _FFTW3_ integer, dimension(1) :: vector_sizes_real, vector_sizes_complex integer :: stride, dist_real, dist_complex ! we need two dummy arrays for the planner. Since at ! present we are not using SSE instructions, these are save ! to be local to this routine real, dimension(:,:), allocatable :: dummy_real_data complex, dimension (:,:), allocatable :: dummy_complex_data logical :: transposed_internal #endif call time_message(.false., time_fft, ' FFT') call fft%init(n, direction, howmany, .false.) #if FFT == _FFTW3_ transposed_internal = get_option_with_default(transposed, .false.) if (transposed_internal) then stride = howmany dist_real = 1 dist_complex = 1 allocate (dummy_real_data(max (1, howmany), N)) allocate (dummy_complex_data(max(1, howmany), N/2+1)) else stride = 1 dist_real = N dist_complex = N/2 + 1 allocate (dummy_real_data(N, max (1, howmany))) allocate (dummy_complex_data(N/2+1, max(1, howmany))) endif vector_sizes_real = N vector_sizes_complex = N/2+1 fft%plan = fftw_plan_many_dft_r2c(1, & vector_sizes_real, howmany, & dummy_real_data, vector_sizes_real, stride, dist_real, & dummy_complex_data, vector_sizes_complex, stride, dist_complex, & fftw_flags()) deallocate (dummy_real_data, dummy_complex_data) #else UNUSED_DUMMY(transposed) #endif call time_message(.false., time_fft, ' FFT') end subroutine init_rcfftw_1d !> Create FFTW plan suitable for 1D complex to real transformation subroutine init_crfftw_1d (fft, direction, n, howmany, transposed) use job_manage, only: time_message use optionals, only: get_option_with_default implicit none !> The created plan type (fft_type), intent (out) :: fft !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]] integer, intent (in) :: direction !> Number of points in array to transform integer, intent (in) :: n !> Number of independent FFTs in array integer, intent (in) :: howmany !> If true, then take FFT of transpose of array logical, optional, intent(in) :: transposed #if FFT == _FFTW3_ integer, dimension(1) :: vector_sizes_real, vector_sizes_complex integer :: stride, dist_real, dist_complex ! we need two dummy arrays for the planner. Since at ! present we are not using SSE instructions, these are save ! to be local to this routine real, dimension(:,:), allocatable :: dummy_real_data complex, dimension (:,:), allocatable :: dummy_complex_data logical :: transposed_internal #endif call time_message(.false., time_fft, ' FFT') call fft%init(n, direction, howmany, .false.) #if FFT == _FFTW3_ transposed_internal = get_option_with_default(transposed, .false.) if (transposed_internal) then stride = howmany dist_real = 1 dist_complex = 1 allocate (dummy_real_data(max (1, howmany), N)) allocate (dummy_complex_data(max(1, howmany), N/2+1)) else stride = 1 dist_real = N dist_complex = N/2 + 1 allocate (dummy_real_data(N, max (1, howmany))) allocate (dummy_complex_data(N/2+1, max(1, howmany))) endif vector_sizes_real = N vector_sizes_complex = N/2+1 fft%plan = fftw_plan_many_dft_c2r(1, & vector_sizes_real, howmany, & dummy_complex_data, vector_sizes_complex, stride, dist_complex, & dummy_real_data, vector_sizes_real, stride, dist_real, & fftw_flags()) deallocate (dummy_real_data, dummy_complex_data) #else UNUSED_DUMMY(transposed) #endif call time_message(.false., time_fft, ' FFT') end subroutine init_crfftw_1d !> Create FFTW plan suitable for 2D real to complex transformation subroutine init_rcfftw_2d (fft, direction, m, n, howmany, stride_) use job_manage, only: time_message use optionals, only: get_option_with_default implicit none !> The created plan type (fft_type), intent (out) :: fft !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]] integer, intent (in) :: direction !> Number of points in first dimension of array to transform integer, intent (in) :: m !> Number of points in second dimension of array to transform integer, intent (in) :: n !> Number of independent FFTs in array integer, intent (in) :: howmany !> Stride of array if different from `howmany` integer, optional, intent(in) :: stride_ #if FFT == _FFTW3_ integer, parameter :: fft_dimension = 2 integer :: stride ! to pass to FFTW3, we need vector for actual sizes of the real ! and complex data integer, dimension(fft_dimension) :: & vector_sizes_real, vector_sizes_complex ! we need two dummy arrays for the planner. Since at ! present we are not using SSE instructions, these are save ! to be local to this routine real, dimension(:,:), allocatable :: dummy_real_data complex, dimension (:,:), allocatable :: dummy_complex_data #endif call time_message(.false., time_fft, ' FFT') call fft%init(m * n, direction, howmany, .true.) #if FFT == _FFTW3_ stride = get_option_with_default(stride_, howmany) allocate (dummy_real_data(stride, m*n)) allocate (dummy_complex_data(stride, (m/2+1)*n)) vector_sizes_real(1) = m vector_sizes_real(2) = n vector_sizes_complex(1) = m/2+1 vector_sizes_complex(2) = n ! FFTW F03 requires us to pass the shape of the arrays in reverse order vector_sizes_real = vector_sizes_real(fft_dimension:1:-1) vector_sizes_complex = vector_sizes_complex(fft_dimension:1:-1) fft%plan = fftw_plan_many_dft_r2c(fft_dimension, & vector_sizes_real, howmany, & dummy_real_data, vector_sizes_real, stride, 1, & dummy_complex_data, vector_sizes_complex, stride, 1, & fftw_flags()) deallocate (dummy_real_data, dummy_complex_data) #else UNUSED_DUMMY(stride_) #endif call time_message(.false., time_fft, ' FFT') end subroutine init_rcfftw_2d !> Create FFTW plan suitable for 2D complex to real transformation subroutine init_crfftw_2d (fft, direction, m, n, howmany, stride_) use job_manage, only: time_message use optionals, only: get_option_with_default implicit none !> The created plan type (fft_type), intent (out) :: fft !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]] integer, intent (in) :: direction !> Number of points in first dimension of array to transform integer, intent (in) :: m !> Number of points in second dimension of array to transform integer, intent (in) :: n !> Number of independent FFTs in array integer, intent (in) :: howmany !> Stride of array if different from `howmany` integer, optional, intent(in) :: stride_ #if FFT == _FFTW3_ integer, parameter :: fft_dimension = 2 integer :: stride ! to pass to FFTW3, we need vector for actual sizes of the real ! and complex data integer, dimension(fft_dimension) :: & vector_sizes_real, vector_sizes_complex ! we need two dummy arrays for the planner. Since at ! present we are not using SSE instructions, these are save ! to be local to this routine real, dimension(:,:), allocatable :: dummy_real_data complex, dimension (:,:), allocatable :: dummy_complex_data #endif call time_message(.false., time_fft, ' FFT') call fft%init(m * n, direction, howmany, .true.) #if FFT == _FFTW3_ stride = get_option_with_default(stride_, howmany) allocate (dummy_real_data(stride, m*n)) allocate (dummy_complex_data(stride, (m/2+1)*n)) vector_sizes_real(1) = m vector_sizes_real(2) = n vector_sizes_complex(1) = m/2+1 vector_sizes_complex(2) = n ! FFTW F03 requires us to pass the shape of the arrays in reverse order vector_sizes_real = vector_sizes_real(fft_dimension:1:-1) vector_sizes_complex = vector_sizes_complex(fft_dimension:1:-1) fft%plan = fftw_plan_many_dft_c2r(fft_dimension, & vector_sizes_real, howmany, & dummy_complex_data, vector_sizes_complex, stride, 1, & dummy_real_data, vector_sizes_real, stride, 1, & fftw_flags()) deallocate (dummy_real_data, dummy_complex_data) #else UNUSED_DUMMY(stride_) #endif call time_message(.false., time_fft, ' FFT') end subroutine init_crfftw_2d !> Restore FFTW to pristine state subroutine finish_fft_work use job_manage, only: time_message implicit none # if FFT == _FFTW3_ call time_message(.false., time_fft, ' FFT') call fftw_cleanup call time_message(.false., time_fft, ' FFT') # endif end subroutine finish_fft_work !> Save FFTW wisdom to file !> !> See FFTW documentation for details of wisdom subroutine save_wisdom(filename) use job_manage, only: time_message #ifdef FFT use iso_c_binding, only: c_int, c_null_char integer(c_int) :: ret #endif character(*), intent(in) :: filename #ifdef FFT call time_message(.false., time_fft, ' FFT') ret = fftw_export_wisdom_to_filename(filename//c_null_char) call time_message(.false., time_fft, ' FFT') #else UNUSED_DUMMY(filename) #endif end subroutine save_wisdom !> Load FFTW wisdom from file !> !> See FFTW documentation for details of wisdom subroutine load_wisdom(filename) use job_manage, only: time_message #ifdef FFT use iso_c_binding, only: c_int, c_null_char integer(c_int) :: ret #endif character(*), intent(in) :: filename #ifdef FFT call time_message(.false., time_fft, ' FFT') ret = fftw_import_wisdom_from_filename(filename//c_null_char) call time_message(.false., time_fft, ' FFT') #else UNUSED_DUMMY(filename) #endif end subroutine load_wisdom end module fft_work