fft_work.fpp Source File


Contents

Source Code


Source Code

#include "define.inc"
#include "unused_dummy.inc"

#if FFT == _FFTW_
#error Support for FFTW2 has been removed, please switch to FFTW3
#endif

!> Provides some wrappers for managing FFTW plans.
!>
!> The initialisation routines ([[init_crfftw]], [[init_rcfftw]],
!> [[init_ccfftw]]) create FFTW plans, storing them in an instance of
!> [[fft_type]], along with some useful metadata, notably
!> [[fft_type:scale]].
module fft_work
! Modifications for using FFTW version 3:
! (c) The Numerical Algorithms Group (NAG) Ltd, 2009
!                                 on behalf of the HECToR project
  use, intrinsic:: iso_c_binding, only: C_PTR, C_NULL_PTR

#if FFT == _FFTW3_
#ifdef SINGLE_PRECISION
  use fft_wrapper, only: fftw_plan_many_dft => fftwf_plan_many_dft
  use fft_wrapper, only: fftw_plan_many_dft_c2r => fftwf_plan_many_dft_c2r
  use fft_wrapper, only: fftw_plan_many_dft_r2c => fftwf_plan_many_dft_r2c
  use fft_wrapper, only: fftw_init_threads => fftwf_init_threads
  use fft_wrapper, only: fftw_plan_with_nthreads => fftwf_plan_with_nthreads
  use fft_wrapper, only: fftw_destroy_plan => fftwf_destroy_plan
  use fft_wrapper, only: fftw_cleanup => fftwf_cleanup
  use fft_wrapper, only: fftw_execute_dft => fftwf_execute_dft
  use fft_wrapper, only: fftw_execute_dft_r2c => fftwf_execute_dft_r2c
  use fft_wrapper, only: fftw_execute_dft_c2r => fftwf_execute_dft_c2r
  use fft_wrapper, only: fftw_export_wisdom_to_filename => fftwf_export_wisdom_to_filename
  use fft_wrapper, only: fftw_import_wisdom_from_filename => fftwf_import_wisdom_from_filename  
#else
  use fft_wrapper, only: fftw_plan_many_dft
  use fft_wrapper, only: fftw_plan_many_dft_c2r
  use fft_wrapper, only: fftw_plan_many_dft_r2c
  use fft_wrapper, only: fftw_init_threads
  use fft_wrapper, only: fftw_plan_with_nthreads
  use fft_wrapper, only: fftw_destroy_plan
  use fft_wrapper, only: fftw_cleanup
  use fft_wrapper, only: fftw_execute_dft
  use fft_wrapper, only: fftw_execute_dft_r2c
  use fft_wrapper, only: fftw_execute_dft_c2r
  use fft_wrapper, only: fftw_export_wisdom_to_filename
  use fft_wrapper, only: fftw_import_wisdom_from_filename
#endif
  use fft_wrapper, only: FFTW_PATIENT, FFTW_ESTIMATE, FFTW_UNALIGNED
#endif

  !These are always defined
  use fft_wrapper, only: FFT_TO_SPECTRAL_SPACE => FFTW_FORWARD
  use fft_wrapper, only: FFT_TO_REAL_SPACE => FFTW_BACKWARD

  implicit none

  private

  public :: measure_plan, finish_fft_work, time_fft, fft_type
  public :: init_ccfftw, init_crfftw, init_rcfftw, init_z
  public :: FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
  public :: save_wisdom, load_wisdom

  interface init_crfftw
     module procedure init_crfftw_1d
     module procedure init_crfftw_2d
  end interface

  interface init_rcfftw
     module procedure init_rcfftw_1d
     module procedure init_rcfftw_2d
  end interface

  !> If true, then use FFTW_PATIENT to pick the optimal FFT algorithm,
  !> otherwise use FFTW_ESTIMATE (potentially slower but more
  !> reproducible results). FFTW_PATIENT takes longer to set up the
  !> plan initially, but may be faster overall.
  logical :: measure_plan = .true.

  !> Information about a planned FFT transform
  type :: fft_type
     !> True if the plan has been created
     logical :: created = .false.
     !> Number of points in the array to be transformed
     integer :: n = 0
     !> Direction of the FFT, can be either [[FFT_FORWARD]] or [[FFTW_BACKWARD]]
     integer :: direction = 0
     !> The FFTW plan object
     type(C_PTR) :: plan = C_NULL_PTR
     !> How many independent slices in the array to be transformed
     integer :: howmany = 0
     !> True if the array is strided or 2D
     logical :: strided = .false.
#ifdef SHMEM
     !> start indices for rank partition in first 2 dims
     integer :: ts1 = -1, ts2 = -1
#endif
     !> Normalisation factor
     real :: scale = 0
     !> If true, then the result of the FFT needs to be multiplied by
     !> [[fft_type:scale]]. If false, then the scaling can be skipped,
     !> saving some work
     logical :: must_scale = .false.
   contains
     procedure, public :: execute_c2c, execute_r2c, execute_c2r
     ! Unfortunately we can't overload the execute_* routines due to the
     ! assumed shape which doesn't seem to carry through in interfaces.
     ! Otherwise generic :: execute => execute_c2c, execute_c2r, execute_r2c
     procedure, public :: delete => delete_fft
     procedure, private :: init => basic_init
  end type fft_type

  !> Total time taken setting up and performing FFTs
  real :: time_fft(2) = 0.

  !> True if this module has been initialised
  logical :: initialised = .false.
contains

  !> Small wrapper to bind execute c2c to fft_type
  subroutine execute_c2c(self, array_in, array_out)
    class(fft_type), intent(in) :: self
    complex, dimension(*), intent(in out) :: array_in
    complex, dimension(*), intent(out) :: array_out
#if FFT == _FFTW3_
    call fftw_execute_dft(self%plan, array_in, array_out)
#else
    UNUSED_DUMMY(self)
    !Can't do UNUSED_DUMMY on assumed shape arrays
    if (.false.) write(*,*) array_out(1), array_in(1)
#endif
  end subroutine execute_c2c

  !> Small wrapper to bind execute r2c to fft_type
  subroutine execute_r2c(self, array_in, array_out)
    class(fft_type), intent(in) :: self
    real, dimension(*), intent(in out) :: array_in
    complex, dimension(*), intent(out) :: array_out
#if FFT == _FFTW3_
    call fftw_execute_dft_r2c(self%plan, array_in, array_out)
#else
    UNUSED_DUMMY(self)
    if (.false.) write(*,*) array_out(1), array_in(1)
#endif
  end subroutine execute_r2c

  !> Small wrapper to bind execute c2r to fft_type
  subroutine execute_c2r(self, array_in, array_out)
    class(fft_type), intent(in) :: self
    complex, dimension(*), intent(in out) :: array_in
    real, dimension(*), intent(out) :: array_out
#if FFT == _FFTW3_
    call fftw_execute_dft_c2r(self%plan, array_in, array_out)
#else
    UNUSED_DUMMY(self)
    if (.false.) write(*,*) array_out(1), array_in(1)
#endif
  end subroutine execute_c2r

  !> Destroy FFT plan
  !>
  !> This is safe to call on previously destroyed or
  !> non-constructed/empty plans
  subroutine delete_fft(self)
    use job_manage, only: time_message
    implicit none
    class(fft_type), intent(in out) :: self
    !Don't try to delete ffts which have not been created
    if (.not. self%created) return
    call time_message(.false., time_fft, ' FFT')
#if FFT == _FFTW3_
    call fftw_destroy_plan(self%plan)
# endif
    select type (self)
    type is(fft_type)
       self = fft_type()
    end select
    call time_message(.false., time_fft, ' FFT')
  end subroutine delete_fft

  !> Perform any one off setup of the fft library.
  !> This is currently mostly only needed for builds with
  !> OpenMP for threaded transforms.
  subroutine init_fft_work
#if FFT == _FFTW3_
#ifdef OPENMP
    use omp_lib, only: omp_get_num_threads
    implicit none
    integer :: ierr, nthreads
    if (initialised) return
    initialised = .true.
    !$OMP PARALLEL
    nthreads = omp_get_num_threads()
    !$OMP END PARALLEL
    ierr = fftw_init_threads()
    call fftw_plan_with_nthreads(nthreads)
#endif
#endif
  end subroutine init_fft_work

  !> Does some common initialisation for [[fft_type]]
  subroutine basic_init(self, n, direction, howmany, strided)
    use mp, only: mp_abort
    use print_utils, only: to_string
    class(fft_type), intent(in out) :: self
    !> Number of points
    integer, intent(in) :: n
    !> Direction of FFT, must be either [[FFT_TO_REAL_SPACE]] or [[FFT_TO_SPECTRAL_SPACE]]
    integer, intent(in) :: direction
    !> Number of independent FFTs
    integer, intent(in) :: howmany
    !> Is array strided/2D
    logical, intent(in) :: strided

    if (.not. initialised) call init_fft_work

    self%n = n
    self%direction = direction
    if (direction == FFT_TO_SPECTRAL_SPACE) then
       self%scale = 1 / real(n)
       self%must_scale = .true.
    else if (direction == FFT_TO_REAL_SPACE) then
       self%scale = 1
       self%must_scale = .false.
    else
       call mp_abort(&
            "Unexpected FFT direction, expected one of FFT_TO_REAL_SPACE, " &
            // "FFT_TO_SPECTRAL_SPACE, got " // to_string(direction))
    end if
    self%howmany = howmany
    ! Note we don't use strided for anything
    self%strided = strided
    ! Note we haven't actually created the plan yet, so this is perhaps a bit
    ! misleading
    self%created = .true.
  end subroutine basic_init

# if FFT == _FFTW3_
  !> Set flags for FFTW3
  pure integer function fftw_flags()
    if (measure_plan) then
       fftw_flags = FFTW_PATIENT + FFTW_UNALIGNED
    else
       fftw_flags = FFTW_ESTIMATE + FFTW_UNALIGNED
    endif
  end function fftw_flags
#endif

  !> FIXME : Add documentation
  subroutine init_z (fft, direction, n, howmany)
    use job_manage, only: time_message
    implicit none
    type (fft_type), intent (out) :: fft
    integer, intent (in) :: direction, n
    integer, intent (in) :: howmany
# if FFT == _FFTW3_
    complex, dimension (:,:), allocatable :: dummy_in_data, dummy_out_data
    integer, dimension (1) :: array_n, embed
# endif
    call time_message(.false., time_fft, ' FFT')
    call fft%init(n, direction, howmany, .false.)
#if FFT == _FFTW3_
    array_n = n
    embed   = n+1

    allocate (dummy_in_data(n+1, howmany), dummy_out_data(n+1, howmany))

    fft%plan = fftw_plan_many_dft(1, array_n, howmany, &
         dummy_in_data,  embed, 1, n+1, &
         dummy_out_data, embed, 1, n+1, &
         direction, fftw_flags())

    deallocate (dummy_in_data, dummy_out_data)
# endif
    call time_message(.false., time_fft, ' FFT')
  end subroutine init_z

  !> Create an FFTW plan for complex-to-complex transforms
  subroutine init_ccfftw (fft, direction, n, howmany, data_array, transpose, m)
    use mp, only: mp_abort
    use job_manage, only: time_message
    use optionals, only: get_option_with_default
    implicit none
    !> The created plan
    type (fft_type), intent (out) :: fft
    !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]]
    integer, intent (in) :: direction
    !> Number of points in array to transform
    integer, intent (in) :: n
    !> Number of independent FFTs in array
    integer, intent (in) :: howmany
    !> Example array to transform
    complex, dimension(:,:), intent(inout) :: data_array
    !> If true, then take FFT of transpose of array
    logical, optional, intent(in) :: transpose
    !> Size of transpose direction. Required if `transpose` is true
    integer, optional, intent(in) :: m
# if FFT == _FFTW3_
    integer, dimension(1) :: array_n
    integer :: stride, dist
# endif
    call time_message(.false., time_fft, ' FFT')
    call fft%init(n, direction, howmany, .false.)
#if FFT == _FFTW3_
    ! the planer expects this as an array of size 1
    array_n = n

    if (get_option_with_default(transpose, .false.)) then
       if (.not. present(m)) call mp_abort("Missing argument 'm' for transpose FFT")
       stride = howmany * m
       dist = 1
    else
       stride = 1
       dist = n
    end if

    fft%plan = fftw_plan_many_dft(1, array_n, howmany, &
         data_array, array_n, stride, dist, &
         data_array, array_n, stride, dist, &
         direction, fftw_flags())
#else
    UNUSED_DUMMY(data_array); UNUSED_DUMMY(transpose); UNUSED_DUMMY(m)
# endif
    call time_message(.false., time_fft, ' FFT')
  end subroutine init_ccfftw

  !> Create FFTW plan suitable for 1D real to complex transformation
  subroutine init_rcfftw_1d (fft, direction, n, howmany, transposed)
    use job_manage, only: time_message
    use optionals, only: get_option_with_default
    implicit none
    !> The created plan
    type (fft_type), intent (out) :: fft
    !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]]
    integer, intent (in) :: direction
    !> Number of points in array to transform
    integer, intent (in) :: n
    !> Number of independent FFTs in array
    integer, intent (in) :: howmany
    !> If true, then take FFT of transpose of array
    logical, optional, intent(in) :: transposed

#if FFT == _FFTW3_
    integer, dimension(1) :: vector_sizes_real, vector_sizes_complex
    integer :: stride, dist_real, dist_complex
    ! we need two dummy arrays for the planner.  Since at
    ! present we are not using SSE instructions, these are save
    ! to be local to this routine
    real, dimension(:,:), allocatable :: dummy_real_data
    complex, dimension (:,:), allocatable :: dummy_complex_data
    logical :: transposed_internal
#endif
    call time_message(.false., time_fft, ' FFT')
    call fft%init(n, direction, howmany, .false.)
#if FFT == _FFTW3_
    transposed_internal = get_option_with_default(transposed, .false.)
    if (transposed_internal) then
       stride = howmany
       dist_real = 1
       dist_complex = 1
       allocate (dummy_real_data(max (1, howmany), N))
       allocate (dummy_complex_data(max(1, howmany), N/2+1))
    else
       stride = 1
       dist_real = N
       dist_complex = N/2 + 1
       allocate (dummy_real_data(N, max (1, howmany)))
       allocate (dummy_complex_data(N/2+1, max(1, howmany)))
    endif

    vector_sizes_real = N
    vector_sizes_complex = N/2+1

    fft%plan = fftw_plan_many_dft_r2c(1, &
         vector_sizes_real, howmany, &
         dummy_real_data,    vector_sizes_real,    stride, dist_real,     &
         dummy_complex_data, vector_sizes_complex, stride, dist_complex, &
         fftw_flags())

    deallocate (dummy_real_data, dummy_complex_data)
#else
    UNUSED_DUMMY(transposed)
#endif
    call time_message(.false., time_fft, ' FFT')
  end subroutine init_rcfftw_1d

  !> Create FFTW plan suitable for 1D complex to real transformation
  subroutine init_crfftw_1d (fft, direction, n, howmany, transposed)
    use job_manage, only: time_message
    use optionals, only: get_option_with_default
    implicit none
    !> The created plan
    type (fft_type), intent (out) :: fft
    !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]]
    integer, intent (in) :: direction
    !> Number of points in array to transform
    integer, intent (in) :: n
    !> Number of independent FFTs in array
    integer, intent (in) :: howmany
    !> If true, then take FFT of transpose of array
    logical, optional, intent(in) :: transposed

#if FFT == _FFTW3_
    integer, dimension(1) :: vector_sizes_real, vector_sizes_complex
    integer :: stride, dist_real, dist_complex
    ! we need two dummy arrays for the planner.  Since at
    ! present we are not using SSE instructions, these are save
    ! to be local to this routine
    real, dimension(:,:), allocatable :: dummy_real_data
    complex, dimension (:,:), allocatable :: dummy_complex_data
    logical :: transposed_internal
#endif
    call time_message(.false., time_fft, ' FFT')
    call fft%init(n, direction, howmany, .false.)
#if FFT == _FFTW3_
    transposed_internal = get_option_with_default(transposed, .false.)
    if (transposed_internal) then
       stride = howmany
       dist_real = 1
       dist_complex = 1
       allocate (dummy_real_data(max (1, howmany), N))
       allocate (dummy_complex_data(max(1, howmany), N/2+1))
    else
       stride = 1
       dist_real = N
       dist_complex = N/2 + 1
       allocate (dummy_real_data(N, max (1, howmany)))
       allocate (dummy_complex_data(N/2+1, max(1, howmany)))
    endif

    vector_sizes_real = N
    vector_sizes_complex = N/2+1

    fft%plan = fftw_plan_many_dft_c2r(1, &
         vector_sizes_real, howmany, &
         dummy_complex_data, vector_sizes_complex, stride, dist_complex, &
         dummy_real_data,    vector_sizes_real,    stride, dist_real,     &
         fftw_flags())

    deallocate (dummy_real_data, dummy_complex_data)
#else
    UNUSED_DUMMY(transposed)
#endif
    call time_message(.false., time_fft, ' FFT')
  end subroutine init_crfftw_1d

  !> Create FFTW plan suitable for 2D real to complex transformation
  subroutine init_rcfftw_2d (fft, direction, m, n, howmany, stride_)
    use job_manage, only: time_message
    use optionals, only: get_option_with_default
    implicit none
    !> The created plan
    type (fft_type), intent (out) :: fft
    !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]]
    integer, intent (in) :: direction
    !> Number of points in first dimension of array to transform
    integer, intent (in) :: m
    !> Number of points in second dimension of array to transform
    integer, intent (in) :: n
    !> Number of independent FFTs in array
    integer, intent (in) :: howmany
    !> Stride of array if different from `howmany`
    integer, optional, intent(in) :: stride_

#if FFT == _FFTW3_
    integer, parameter :: fft_dimension = 2
    integer  :: stride

    ! to pass to FFTW3, we need vector for actual sizes of the real
    ! and complex data
    integer, dimension(fft_dimension) :: &
         vector_sizes_real, vector_sizes_complex

    ! we need two dummy arrays for the planner.  Since at
    ! present we are not using SSE instructions, these are save
    ! to be local to this routine
    real, dimension(:,:), allocatable :: dummy_real_data
    complex, dimension (:,:), allocatable :: dummy_complex_data
#endif
    call time_message(.false., time_fft, ' FFT')
    call fft%init(m * n, direction, howmany, .true.)
#if FFT == _FFTW3_
    stride = get_option_with_default(stride_, howmany)

    allocate (dummy_real_data(stride, m*n))
    allocate (dummy_complex_data(stride, (m/2+1)*n))

    vector_sizes_real(1) = m
    vector_sizes_real(2) = n

    vector_sizes_complex(1) = m/2+1
    vector_sizes_complex(2) = n

    ! FFTW F03 requires us to pass the shape of the arrays in reverse order
    vector_sizes_real = vector_sizes_real(fft_dimension:1:-1)
    vector_sizes_complex = vector_sizes_complex(fft_dimension:1:-1)

    fft%plan = fftw_plan_many_dft_r2c(fft_dimension, &
         vector_sizes_real, howmany, &
         dummy_real_data, vector_sizes_real, stride, 1, &
         dummy_complex_data, vector_sizes_complex, stride, 1, &
         fftw_flags())

    deallocate (dummy_real_data, dummy_complex_data)
#else
    UNUSED_DUMMY(stride_)
#endif
    call time_message(.false., time_fft, ' FFT')
  end subroutine init_rcfftw_2d

  !> Create FFTW plan suitable for 2D complex to real transformation
  subroutine init_crfftw_2d (fft, direction, m, n, howmany, stride_)
    use job_manage, only: time_message
    use optionals, only: get_option_with_default
    implicit none
    !> The created plan
    type (fft_type), intent (out) :: fft
    !> Direction of the transformation, either [[FFT_TO_SPECTRAL_SPACE]] or [[FFT_TO_REAL_SPACE]]
    integer, intent (in) :: direction
    !> Number of points in first dimension of array to transform
    integer, intent (in) :: m
    !> Number of points in second dimension of array to transform
    integer, intent (in) :: n
    !> Number of independent FFTs in array
    integer, intent (in) :: howmany
    !> Stride of array if different from `howmany`
    integer, optional, intent(in) :: stride_

#if FFT == _FFTW3_
    integer, parameter :: fft_dimension = 2
    integer  :: stride

    ! to pass to FFTW3, we need vector for actual sizes of the real
    ! and complex data
    integer, dimension(fft_dimension) :: &
         vector_sizes_real, vector_sizes_complex

    ! we need two dummy arrays for the planner.  Since at
    ! present we are not using SSE instructions, these are save
    ! to be local to this routine
    real, dimension(:,:), allocatable :: dummy_real_data
    complex, dimension (:,:), allocatable :: dummy_complex_data
#endif
    call time_message(.false., time_fft, ' FFT')
    call fft%init(m * n, direction, howmany, .true.)
#if FFT == _FFTW3_
    stride = get_option_with_default(stride_, howmany)

    allocate (dummy_real_data(stride, m*n))
    allocate (dummy_complex_data(stride, (m/2+1)*n))

    vector_sizes_real(1) = m
    vector_sizes_real(2) = n

    vector_sizes_complex(1) = m/2+1
    vector_sizes_complex(2) = n

    ! FFTW F03 requires us to pass the shape of the arrays in reverse order
    vector_sizes_real = vector_sizes_real(fft_dimension:1:-1)
    vector_sizes_complex = vector_sizes_complex(fft_dimension:1:-1)

    fft%plan = fftw_plan_many_dft_c2r(fft_dimension, &
         vector_sizes_real, howmany, &
         dummy_complex_data, vector_sizes_complex, stride, 1, &
         dummy_real_data, vector_sizes_real, stride, 1, &
         fftw_flags())

    deallocate (dummy_real_data, dummy_complex_data)
#else
    UNUSED_DUMMY(stride_)
#endif
    call time_message(.false., time_fft, ' FFT')
  end subroutine init_crfftw_2d

  !> Restore FFTW to pristine state
  subroutine finish_fft_work
    use job_manage, only: time_message
    implicit none
# if FFT == _FFTW3_
    call time_message(.false., time_fft, ' FFT')
    call fftw_cleanup
    call time_message(.false., time_fft, ' FFT')
# endif
  end subroutine finish_fft_work

  !> Save FFTW wisdom to file
  !>
  !> See FFTW documentation for details of wisdom
  subroutine save_wisdom(filename)
    use job_manage, only: time_message
#ifdef FFT
    use iso_c_binding, only: c_int, c_null_char
    integer(c_int) :: ret
#endif
    character(*), intent(in) :: filename
#ifdef FFT
    call time_message(.false., time_fft, ' FFT')
    ret = fftw_export_wisdom_to_filename(filename//c_null_char)
    call time_message(.false., time_fft, ' FFT')
#else
    UNUSED_DUMMY(filename)
#endif
  end subroutine save_wisdom

  !> Load FFTW wisdom from file
  !>
  !> See FFTW documentation for details of wisdom
  subroutine load_wisdom(filename)
    use job_manage, only: time_message
#ifdef FFT
    use iso_c_binding, only: c_int, c_null_char
    integer(c_int) :: ret
#endif
    character(*), intent(in) :: filename
#ifdef FFT
    call time_message(.false., time_fft, ' FFT')
    ret = fftw_import_wisdom_from_filename(filename//c_null_char)
    call time_message(.false., time_fft, ' FFT')
#else
    UNUSED_DUMMY(filename)
#endif
  end subroutine load_wisdom
end module fft_work