fields_implicit.f90 Source File


Contents

Source Code


Source Code

!> FIXME : Add documentation
module fields_implicit
  implicit none

  private
 
  public :: init_fields_implicit
  public :: advance_implicit
  public :: remove_zonal_flows
  public :: init_allfields_implicit
  public :: reset_init
  public :: field_subgath, dump_response, read_response
  public :: dump_response_to_file_imp

!///////////////////////////////////////////////////////
!// DERIVED TYPES FOR FIELD MATRIX REPRESENTATION
!///////////////////////////////////////////////////////

  !////////////////////////////////////////////////////////////////
  !// DCELL : 
  ! Within each supercell, there are are N_class primary cells.  Each 
  ! has (2*ntgrid+1)*nfield points.
  !> FIXME : Add documentation  
  type dcell_type
     complex, dimension(:), allocatable :: supercell
  end type dcell_type
  !----------------------------------------------------------------

  ! Within each class, there may be multiple supercells.

  ! The number of supercells in each class is M_class.
  
  ! When aminv is laid out over PEs, the supercells of each class 
  ! are distributed -- thus, "dcells"
  
  !////////////////////////////////////////////////////////////////
  !// FIELD_MATRIX_TYPE :
  !> FIXME : Add documentation  
  type :: field_matrix_type
     type(dcell_type), dimension (:), allocatable :: dcell
  end type field_matrix_type
  !----------------------------------------------------------------

  !////////////////////////////////////////////////////////////////
  !// AMINV :
  ! There may be supercells of different sizes or "classes".  
  type (field_matrix_type), dimension (:), allocatable :: aminv
  !----------------------------------------------------------------
!-------------------------------------------------------

  !> A variable to help with running benchmarks... do not set true
  !! unless you know what you are doing. If true, the response matrix
  !! will not be initialised and set to zero. The results of any 
  !! simulation will be garbage
  logical, public :: skip_initialisation = .false.

  integer :: nfield, nidx
  logical :: initialized = .false.
  logical :: field_subgath
  logical :: dump_response=.false., read_response=.false.
  integer, dimension(:), allocatable :: recvcnts, displs
contains
  !> FIXME : Add documentation
  subroutine init_fields_implicit
    use antenna, only: init_antenna
    use theta_grid, only: init_theta_grid
    use kt_grids, only: init_kt_grids
    use gs2_layouts, only: init_gs2_layouts
    use run_parameters, only: has_phi, has_apar, has_bpar
    use unit_tests, only: should_print
    use mp, only: mp_abort
    implicit none
    logical :: debug

    if (initialized) return
    initialized = .true.

    debug = should_print(3)

    !Check we have at least one field. If not abort.
    !Note, we do this here rather than as soon as we read input file
    !as other field types may support operation with no fields (e.g. 'local' does)
    if(.not. (has_phi .or. has_apar .or. has_bpar)) then
       call mp_abort("Field_option='implicit' requires at least one field is non-zero",.true.)
    endif

    if (debug) write(6,*) "init_fields_implicit: gs2_layouts"
    call init_gs2_layouts
    if (debug) write(6,*) "init_fields_implicit: theta_grid"
    call init_theta_grid
    if (debug) write(6,*) "init_fields_implicit: kt_grids"
    call init_kt_grids
    if (debug) write(6,*) "init_fields_implicit: response_matrix"
    call init_response_matrix
    if (debug) write(6,*) "init_fields_implicit: antenna"
    call init_antenna
  end subroutine init_fields_implicit

  !> FIXME : Add documentation  
  subroutine init_allfields_implicit(gf_lo)
    use fields_arrays, only: phi, apar, bpar, phinew, aparnew, bparnew
    use dist_fn_arrays, only: g, gnew
    use dist_fn, only: get_init_field
    use init_g, only: new_field_init
    use array_utils, only: copy
    use optionals, only: get_option_with_default
    implicit none

    logical, intent(in), optional :: gf_lo
    logical :: local_gf_lo

    local_gf_lo = get_option_with_default(gf_lo, .false.)

    ! MAB> new field init option ported from agk
    if(local_gf_lo) then
       call get_init_field (phinew, aparnew, bparnew, .true.)
       !AJ Note, we do not initialise phi, apar, and bpar here 
       !AJ as this is now done in fields_gf_local along with a 
       !AJ fields redistribute.
       call copy(gnew, g)
    else if (new_field_init) then
       call get_init_field (phinew, aparnew, bparnew)
       call copy(phinew, phi); call copy(aparnew, apar); call copy(bparnew, bpar)
       call copy(gnew, g)
    else
       call getfield (phinew, aparnew, bparnew)
       call copy(phinew, phi); call copy(aparnew, apar); call copy(bparnew, bpar)
    end if
    ! <MAB

  end subroutine init_allfields_implicit

  !> FIXME : Add documentation  
  subroutine get_field_vector (fl, phi, apar, bpar)
    use theta_grid, only: ntgrid
    use dist_fn, only: getfieldeq
    use dist_fn_arrays, only: fieldeq, fieldeqa, fieldeqp
    use run_parameters, only: has_phi, has_apar, has_bpar
    implicit none
    complex, dimension (-ntgrid:,:,:), intent (in) :: phi, apar, bpar
    complex, dimension (:,:,:), intent (out) :: fl
    integer :: istart, ifin

    call getfieldeq (phi, apar, bpar, fieldeq, fieldeqa, fieldeqp)

    ifin = 0

    if (has_phi) then
       istart = ifin + 1
       ifin = (istart-1) + 2*ntgrid+1
       fl(istart:ifin,:,:) = fieldeq
    end if

    if (has_apar) then
       istart = ifin + 1
       ifin = (istart-1) + 2*ntgrid+1
       fl(istart:ifin,:,:) = fieldeqa
    end if

    if (has_bpar) then
       istart = ifin + 1
       ifin = (istart-1) + 2*ntgrid+1
       fl(istart:ifin,:,:) = fieldeqp
    end if

  end subroutine get_field_vector

  !> FIXME : Add documentation  
  subroutine get_field_solution (u)
    use fields_arrays, only: phinew, aparnew, bparnew
    use theta_grid, only: ntgrid
    use kt_grids, only: naky, ntheta0
    use run_parameters, only: has_phi, has_apar, has_bpar
    use gs2_layouts, only: jf_lo, ij_idx
    implicit none
    complex, dimension (0:), intent (in) :: u
    integer :: ik, it, ifield, ll, lr

    ifield = 0

    if (has_phi) then
       ifield = ifield + 1
       do ik = 1, naky
          do it = 1, ntheta0
             ll = ij_idx (jf_lo, -ntgrid, ifield, ik, it)
             lr = ll + 2*ntgrid
             phinew(:,it,ik) = u(ll:lr)
          end do
       end do
    endif

    if (has_apar) then
       ifield = ifield + 1
       do ik = 1, naky
          do it = 1, ntheta0
             ll = ij_idx (jf_lo, -ntgrid, ifield, ik, it)
             lr = ll + 2*ntgrid
             aparnew(:,it,ik) = u(ll:lr)
          end do
       end do
    endif

    if (has_bpar) then
       ifield = ifield + 1
       do ik = 1, naky
          do it = 1, ntheta0
             ll = ij_idx (jf_lo, -ntgrid, ifield, ik, it)
             lr = ll + 2*ntgrid
             bparnew(:,it,ik) = u(ll:lr)
          end do
       end do
    endif

  end subroutine get_field_solution

  !> FIXME : Add documentation  
  subroutine getfield (phi, apar, bpar)
    use kt_grids, only: naky, ntheta0
    use gs2_layouts, only: f_lo, jf_lo
    use theta_grid, only: ntgrid
    use dist_fn, only: N_class
    use mp, only: sum_allreduce, allgatherv, iproc, nproc
    implicit none
    complex, dimension (-ntgrid:,:,:), intent (in) :: phi, apar, bpar
    complex, dimension (:,:,:), allocatable :: fl
    complex, dimension (:), allocatable :: u
    complex, dimension (:), allocatable :: u_small
    integer :: jflo, ik, it, nl, nr, i, m, n, dc

    allocate (fl(nidx, ntheta0, naky))

    !On first call to this routine setup the receive counts (recvcnts)
    !and displacement arrays (displs)
    if ((.not.allocated(recvcnts)).and.field_subgath) then
       allocate(recvcnts(nproc),displs(nproc)) !Note there's no matching deallocate
       do i=0,nproc-1
          displs(i+1)=MIN(i*jf_lo%blocksize,jf_lo%ulim_world+1) !This will assign a displacement outside the array for procs with no data
          recvcnts(i+1)=MIN(jf_lo%blocksize,jf_lo%ulim_world-displs(i+1)+1) !This ensures that we expect no data from procs without any
       enddo
    endif

    ! am*u = fl, Poisson's and Ampere's law, u is phi, apar, bpar 
    ! u = aminv*fl

    call get_field_vector (fl, phi, apar, bpar)

    !Initialise array, if not gathering then have to zero entire array
    if(field_subgath) then
       allocate(u_small(jf_lo%llim_proc:jf_lo%ulim_proc))
    else
       allocate(u_small(0:nidx*ntheta0*naky-1))
    endif
    u_small=0.

    !Should this really be to ulim_alloc instead?
    do jflo = jf_lo%llim_proc, jf_lo%ulim_proc
       
       !Class index
       i =  jf_lo%ij(jflo)
       
       !Class member index (i.e. which member of the class)
       m = jf_lo%mj(jflo)

       !Get ik index
       ik = f_lo(i)%ik(m,1)  ! For fixed i and m, ik does not change as n varies 

       !Get d(istributed) cell index
       dc =  jf_lo%dj(i,jflo)
       
       !Loop over cells in class (these are the 2pi domains in flux tube/box mode)
       do n = 1, N_class(i)
          
          !Get it index
          it = f_lo(i)%it(m,n)
          
          !Get extent of current cell in extended/ballooning space domain
          nl = 1 + nidx*(n-1)
          nr = nl + nidx - 1
          
          !Perform section of matrix vector multiplication
          u_small(jflo)=u_small(jflo)-sum(aminv(i)%dcell(dc)%supercell(nl:nr)*fl(:, it, ik)) 
          
       end do
    end do

    !Free memory
    deallocate (fl)

    !Gather/reduce the remaining data
    if(field_subgath) then
       allocate (u (0:nidx*ntheta0*naky-1))
       call allgatherv(u_small,recvcnts(iproc+1),u,recvcnts,displs)
       deallocate(u_small)
    else
       call sum_allreduce(u_small)
    endif

    !Reshape data into field arrays and free memory
    if(field_subgath)then
       call get_field_solution (u)
       deallocate(u)
    else
       call get_field_solution (u_small)
       deallocate(u_small)
    endif

  end subroutine getfield

  !> FIXME : Add documentation  
  subroutine advance_implicit (istep, remove_zonal_flows_switch)
    use run_parameters, only: reset
    use fields_arrays, only: phi, apar, bpar, phinew, aparnew, bparnew
    use fields_arrays, only: apar_ext, time_field, time_field_mpi
    use antenna, only: antenna_amplitudes, no_driver
    use dist_fn, only: timeadv, exb_shear, collisions_advance
    use dist_fn_arrays, only: g, gnew, kx_shift, theta0_shift
    use unit_tests, only: debug_message
    use job_manage, only: time_message
    use mp, only: get_mp_times
    use array_utils, only: copy
    implicit none
    integer, parameter :: diagnostics = 1
    real :: mp_total, mp_total_after
    integer, intent (in) :: istep
    logical, intent (in) :: remove_zonal_flows_switch

    !GGH NOTE: apar_ext is initialized in this call
    if(.not.no_driver) call antenna_amplitudes (apar_ext)
       
    if (allocated(kx_shift) .or. allocated(theta0_shift)) call exb_shear (gnew, phinew, aparnew, bparnew, istep) 
    
    call copy(gnew, g)
    call copy(phinew, phi); call copy(aparnew, apar); call copy(bparnew, bpar)
    
    call debug_message(4, 'fields_implicit::advance_implicit calling timeadv 1')
    call timeadv (phi, apar, bpar, phinew, aparnew, bparnew, istep)
    call debug_message(4, 'fields_implicit::advance_implicit called timeadv 1')
    if(reset) return !Return is resetting

    if(.not.no_driver) aparnew = aparnew + apar_ext 
    
    call debug_message(4, 'fields_implicit::advance_implicit calling getfield')

    call time_message(.false.,time_field,' Field Solver')
    call get_mp_times(total_time = mp_total)

    call getfield (phinew, aparnew, bparnew)

    phinew   = phinew  + phi
    aparnew  = aparnew + apar
    bparnew  = bparnew + bpar

    call time_message(.false.,time_field,' Field Solver')
    call get_mp_times(total_time = mp_total_after)
    time_field_mpi = time_field_mpi + (mp_total_after - mp_total)

    if (remove_zonal_flows_switch) call remove_zonal_flows
    
    call debug_message(4, 'fields_implicit::advance_implicit calling timeadv')
    call timeadv (phi, apar, bpar, phinew, aparnew, bparnew, istep, diagnostics)
    call debug_message(4, 'fields_implicit::advance_implicit called timeadv')

    ! Advance collisions, if separate from timeadv
    call collisions_advance (phi, bpar, phinew, aparnew, bparnew, diagnostics)

  end subroutine advance_implicit

  !> FIXME : Add documentation
  subroutine remove_zonal_flows
    use fields_arrays, only: phinew
    use theta_grid, only: ntgrid
    use kt_grids, only: ntheta0, naky
    implicit none
    complex, dimension(:,:,:), allocatable :: phi_avg

    allocate(phi_avg(-ntgrid:ntgrid,ntheta0,naky))
    ! fieldline_average_phi will calculate the field line average of phinew and
    ! put it into phi_avg, but only for ik = 1 (the last parameter of the call)
    call fieldline_average_phi(phinew, phi_avg, 1)
    phinew = phinew - phi_avg
    deallocate(phi_avg)
  end subroutine remove_zonal_flows

  !> This generates a field line average of phi_in and writes it to
  !> phi_average. If ik_only is supplied, it will only calculate the
  !> field line average for that ky, leaving the rest of phi_avg unchanged. EGH
  !>
  !> It replaces the routines fieldlineavgphi_loc and fieldlineavgphi_tot,
  !> in fields.f90, which I  think are defunct, as phi is always on every processor.
  !>
  !> @note This calculation is fairly generic so should perhaps be moved out into
  !> a more accessible module to allow reuse (or replaced with such a method).
  subroutine fieldline_average_phi (phi_in, phi_average, ik_only)
    use theta_grid, only: ntgrid, field_line_average
    use kt_grids, only: ntheta0, naky
    use optionals, only: get_option_with_default
    implicit none
    complex, dimension (-ntgrid:,:,:), intent (in) :: phi_in
    complex, dimension (-ntgrid:,:,:), intent (out) :: phi_average
    integer, intent (in), optional :: ik_only
    complex :: phi_avg_line
    integer :: it, ik, ik_only_actual

    ik_only_actual = get_option_with_default(ik_only, -1)

    if (ik_only_actual > 0) then
       phi_average = 0.
       do it = 1,ntheta0
          phi_avg_line = field_line_average(phi_in(:,it,ik_only_actual))
          phi_average(:, it, ik_only_actual) = phi_avg_line
       end do
    else
       do it = 1,ntheta0
          do ik = 1,naky
             phi_average(:, it, ik) = field_line_average(phi_in(:, it, ik))
          end do
       end do
    end if

  end subroutine fieldline_average_phi

  !> FIXME : Add documentation
  subroutine reset_init
    use gs2_layouts, only: finish_jfields_layouts
    ! finish_fields_layouts name conflicts with routine in 
    ! this module
    use gs2_layouts, only: gs2lo_ffl => finish_fields_layouts
    use unit_tests, only: debug_message
    implicit none
    integer :: i, j
    integer, parameter :: verbosity=3
    initialized = .false.

    call debug_message(verbosity, &
      'fields_implicit::reset_init starting')
    if (.not. allocated (aminv)) return
    do i = 1, size(aminv)
       if (.not. allocated (aminv(i)%dcell)) cycle
       do j = 1, size(aminv(i)%dcell)
          if (allocated (aminv(i)%dcell(j)%supercell)) &
               deallocate(aminv(i)%dcell(j)%supercell)
       end do
       if (allocated (aminv(i)%dcell)) deallocate (aminv(i)%dcell)
    end do
    deallocate (aminv)

    call gs2lo_ffl
    if (allocated(recvcnts)) deallocate(recvcnts, displs)

    call finish_jfields_layouts
  end subroutine reset_init

  !> FIXME : Add documentation  
  subroutine init_response_matrix
    use mp, only: barrier, land_allreduce, proc0, nproc, iproc
    use fields_arrays, only: phi, apar, bpar, phinew, aparnew, bparnew
    use theta_grid, only: ntgrid
    use kt_grids, only: naky, ntheta0
    use dist_fn_arrays, only: g
    use dist_fn, only: M_class, N_class, i_class
    use run_parameters, only: has_phi, has_apar, has_bpar
    use gs2_layouts, only: init_fields_layouts, f_lo
    use gs2_layouts, only: init_jfields_layouts
    use file_utils, only: error_unit
    use array_utils, only: zero_array
    implicit none
    integer :: ig, ifield, it, ik, i, m, n
    complex, dimension(:,:), allocatable :: am
    logical :: endpoint
    logical :: response_was_read

    nfield = 0
    if (has_phi) nfield = nfield + 1
    if (has_apar) nfield = nfield + 1
    if (has_bpar) nfield = nfield + 1
    nidx = (2*ntgrid+1)*nfield

    call init_fields_layouts (nfield, nidx, naky, ntheta0, M_class, N_class, i_class, nproc, iproc)
    call init_jfields_layouts (nfield, nidx, naky, ntheta0, i_class, nproc, iproc)
    call finish_fields_layouts

    response_was_read = .false.

    !Either read the reponse
    if(read_response) then
        call read_response_from_file_imp(could_read = response_was_read)

        ! Ensure we reduce response_was_read across all processors so if _any_
        ! processors couldn't read the matrices we recalculate on all processors.
        call land_allreduce(response_was_read)
        if((.not. response_was_read) .and. proc0) write(error_unit(), &
             '("Could not find response file so reverting to calculation.")')

      !elseif(skip_initialisation) then
       !do i = i_class, 1, -1
          !!Pretty sure this barrier is not needed
          !call barrier
          !!       if (proc0) write(*,*) 'beginning class ',i,' with size ',nidx*N_class(i)
          !!Allocate matrix am. First dimension is basically theta along the entire
          !!connected domain for each field. Second dimension is the local section
          !!of the M_class(i)*N_Class(i)*(2*ntgrid+1)*nfield compound domain.
          !!Clearly this will 
          !allocate (am(nidx*N_class(i), f_lo(i)%llim_proc:f_lo(i)%ulim_alloc))


          !!Do we need to zero all 8 arrays on every loop? This can be more expensive than might think.
          !am = 0.0
          !call init_inverse_matrix (am, i)

          !!Free memory
          !deallocate (am)
       !end do
     end if


     !or calculate it
     if (.not. response_was_read) then
!
! keep storage cost down by doing one class at a time
! Note: could define a superclass (of all classes), a structure containing all am,
! then do this all at once.  This would be faster, especially for large runs in a
! sheared domain, and could be triggered by local_field_solve
!

!<DD> Comments
!A class refers to a class of connected domain.
!These classes are defined by the extent of the connected domain, there can be 
!many members of each class.
!There are i_class classes in total.
!N_class(ic) is a count of how many 2pi domains there are in members of class ic
!M_class(ic) is how many members of class ic there are.
!Sum N_class(ic)*M_class(ic) for ic=1,i_class is naky*ntheta0
!In comments cell refers to a 2pi domain whilst supercell is the connected domain,
!i.e. we have classes of supercells based on the number of cells they contain.

       do i = i_class, 1, -1
          !Pretty sure this barrier is not needed
          call barrier
          !       if (proc0) write(*,*) 'beginning class ',i,' with size ',nidx*N_class(i)
          !Allocate matrix am. First dimension is basically theta along the entire
          !connected domain for each field. Second dimension is the local section
          !of the M_class(i)*N_Class(i)*(2*ntgrid+1)*nfield compound domain.
          !Clearly this will 
          allocate (am(nidx*N_class(i), f_lo(i)%llim_proc:f_lo(i)%ulim_alloc))


          !Do we need to zero all 8 arrays on every loop? This can be more expensive than might think.
          call zero_array(am)
          call zero_array(g)
          call zero_array(phi) ; call zero_array(phinew)
          call zero_array(apar) ; call zero_array(aparnew)
          call zero_array(bpar) ; call zero_array(bparnew)

          !Loop over individual 2pi domains / cells
          do n = 1, N_class(i)
             !Loop over theta grid points in cell
             !This is like a loop over nidx as we also handle all the fields in this loop
             do ig = -ntgrid, ntgrid
                !Are we at a connected boundary point on the lower side (i.e. left hand end of a
                !tube/cell connected to the left)
                endpoint = n > 1
                endpoint = ig == -ntgrid .and. endpoint

                !Start counting fields
                ifield = 0

                !Find response to phi
                if (has_phi) then
                   ifield = ifield + 1
                   if (endpoint) then
                      !Do all members of supercell together
                      do m = 1, M_class(i)
                         ik = f_lo(i)%ik(m,n-1)
                         it = f_lo(i)%it(m,n-1)
                         phinew(ntgrid,it,ik) = 1.0
                      end do
                   endif
                   !Do all members of supercell together
                   do m = 1, M_class(i)
                      ik = f_lo(i)%ik(m,n)
                      it = f_lo(i)%it(m,n)
                      phinew(ig,it,ik) = 1.0
                   end do
                   if (.not. skip_initialisation) call init_response_row (ig, ifield, am, i, n)
                   call zero_array(phinew)
                end if

                !Find response to apar
                if (has_apar) then
                   ifield = ifield + 1
                   if (endpoint) then
                      !Do all members of supercell together
                      do m = 1, M_class(i)
                         ik = f_lo(i)%ik(m,n-1)
                         it = f_lo(i)%it(m,n-1)
                         aparnew(ntgrid,it,ik) = 1.0
                      end do
                   endif
                   !Do all members of supercell together
                   do m = 1, M_class(i)
                      ik = f_lo(i)%ik(m,n)
                      it = f_lo(i)%it(m,n)
                      aparnew(ig,it,ik) = 1.0
                   end do
                   call init_response_row (ig, ifield, am, i, n)
                   call zero_array(aparnew)
                end if

                !Find response to bpar
                if (has_bpar) then
                   ifield = ifield + 1
                   if (endpoint) then
                      !Do all members of supercell together
                      do m = 1, M_class(i)
                         ik = f_lo(i)%ik(m,n-1)
                         it = f_lo(i)%it(m,n-1)
                         bparnew(ntgrid,it,ik) = 1.0
                      end do
                   endif
                   !Do all members of supercell together
                   do m = 1, M_class(i)
                      ik = f_lo(i)%ik(m,n)
                      it = f_lo(i)%it(m,n)
                      bparnew(ig,it,ik) = 1.0
                   end do
                   call init_response_row (ig, ifield, am, i, n)
                   call zero_array(bparnew)
                end if
             end do
          end do

          !Invert the matrix
          call init_inverse_matrix (am, i)

          !Free memory
          deallocate (am)

       end do
    endif 

    if(dump_response) call dump_response_to_file_imp
  end subroutine init_response_matrix

  !> FIXME : Add documentation  
  subroutine init_response_row (ig, ifield, am, ic, n)
    use fields_arrays, only: phi, apar, bpar, phinew, aparnew, bparnew
    use theta_grid, only: ntgrid
    use dist_fn, only: getfieldeq, timeadv, M_class, N_class
    use dist_fn_arrays, only: fieldeq, fieldeqa, fieldeqp
    use run_parameters, only: has_phi, has_apar, has_bpar
    use gs2_layouts, only: f_lo, idx, idx_local
    implicit none
    integer, intent (in) :: ig, ifield, ic, n
    complex, dimension(:,f_lo(ic)%llim_proc:), intent (in out) :: am
    integer :: irow, istart, iflo, ik, it, ifin, m, nn

    !Find response to delta function fields
    !NOTE:Timeadv will loop over all iglo even though only one ik
    !has any amplitude, this is quite a waste. Should ideally do all
    !ik at once
    !NOTE:We currently do each independent supercell of the same length
    !together, this may not be so easy if we do all the ik together but it should
    !be possible.
    call timeadv (phi, apar, bpar, phinew, aparnew, bparnew, 0)
    call getfieldeq (phinew, aparnew, bparnew, fieldeq, fieldeqa, fieldeqp)

    !Loop over 2pi domains / cells
    do nn = 1, N_class(ic)

       !Loop over members of the current class (separate supercells/connected domains)
       do m = 1, M_class(ic)

          !Get corresponding it and ik indices
          it = f_lo(ic)%it(m,nn)
          ik = f_lo(ic)%ik(m,nn)
       
          !Work out which row of the matrix we're looking at
          !corresponds to iindex, i.e. which of the nindex points in the
          !supercell we're looking at.
          irow = ifield + nfield*((ig+ntgrid) + (2*ntgrid+1)*(n-1))
          
          !Convert iindex and m to iflo index
          iflo = idx (f_lo(ic), irow, m)
          
          !If this is part of our local iflo range then store
          !the response data
          if (idx_local(f_lo(ic), iflo)) then
             !Where abouts in the supercell does this 2pi*nfield section start
             istart = 0 + nidx*(nn-1)
             
             if (has_phi) then
                ifin = istart + nidx
                istart = istart + 1
                am(istart:ifin:nfield,iflo) = fieldeq(:,it,ik) 
             end if
             
             if (has_apar) then
                ifin = istart + nidx
                istart = istart + 1
                am(istart:ifin:nfield,iflo) = fieldeqa(:,it,ik)
             end if
             
             if (has_bpar) then
                ifin = istart + nidx
                istart = istart + 1
                am(istart:ifin:nfield,iflo) = fieldeqp(:,it,ik)
             end if
             
          end if
                    
       end do
    end do

  end subroutine init_response_row

  !> FIXME : Add documentation  
  subroutine init_inverse_matrix (am, ic)
    use file_utils, only: error_unit
    use kt_grids, only: aky, akx
    use theta_grid, only: ntgrid
    use mp, only: broadcast, send, receive, iproc, get_mp_times
    use job_manage, only: time_message
    use fields_arrays, only: time_field_invert, time_field_invert_mpi
    use gs2_layouts, only: f_lo, idx, idx_local, proc_id, jf_lo
    use gs2_layouts, only: if_idx, im_idx, in_idx, local_field_solve
    use gs2_layouts, only: ig_idx, ifield_idx, ij_idx
    use dist_fn, only: i_class, M_class, N_class
    use array_utils, only: zero_array
    use warning_helpers, only: is_not_zero
    implicit none
    integer, intent (in) :: ic
    complex, dimension(:,f_lo(ic)%llim_proc:), intent (in out) :: am
    complex, dimension(:,:), allocatable :: a_inv, lhscol, rhsrow, col_row_tmp
    complex, dimension (:), allocatable :: am_tmp
    complex :: fac
    integer :: i, j, k, ik, it, m, n, nn, if, ig, jsc, jf, jg, jc
    integer :: irow, ilo, jlo, dc, iflo, ierr
    logical :: iskip, jskip
    real :: mp_total, mp_total_after

    call time_message(.false.,time_field_invert,' Field Matrix Invert')
    call get_mp_times(total_time = mp_total)
    
    allocate (lhscol (nidx*N_class(ic),M_class(ic)))
    allocate (rhsrow (nidx*N_class(ic),M_class(ic)))
   
    !This is the length of a supercell
    j = nidx*N_class(ic)

    !Create storage space
    allocate (a_inv(j,f_lo(ic)%llim_proc:f_lo(ic)%ulim_alloc))
    call zero_array(a_inv)
    
    if (.not. skip_initialisation) then
      !Set (ifield*ig,ilo) "diagonal" to 1?
      do ilo = f_lo(ic)%llim_proc, f_lo(ic)%ulim_proc
         a_inv(if_idx(f_lo(ic),ilo),ilo) = 1.0
      end do

      ! Gauss-Jordan elimination, leaving out internal points at multiples of ntgrid 
      ! for each supercell
      !Loop over parallel gridpoints in supercell
      do i = 1, nidx*N_class(ic)
         !iskip is true iff the theta grid point(ig) corresponding to i
         !is at the upper end of a 2pi domain/cell and is not the rightmost gridpoint
         iskip = N_class(ic) > 1 !Are the multiple cells => are there connections/boundaries
         iskip = i <= nidx*N_class(ic) - nfield .and. iskip !Are we not near the upper boundary of the supercell
         iskip = mod((i+nfield-1)/nfield, 2*ntgrid+1) == 0 .and. iskip !Are we at a theta grid point corresponding to the rightmost point of a 2pi domain
         iskip = i > nfield .and. iskip !Are we not at the lower boundary of the supercell
         if (iskip) cycle
   
         if (local_field_solve) then
            do m = 1, M_class(ic)
               ilo = idx(f_lo(ic),i,m)
               if (idx_local(f_lo(ic),ilo)) then
                  lhscol(:,m) = am(:,ilo)
                  rhsrow(:,m) = a_inv(:,ilo)
               end if
            end do
         else
            allocate(col_row_tmp(nidx*N_class(ic),2)) ; col_row_tmp = 0.
            !Loop over classes (supercell lengths)
            do m = 1, M_class(ic)
               !Convert to f_lo index
               ilo = idx(f_lo(ic),i,m)
               !Is ilo on this proc?
               if (idx_local(f_lo(ic),ilo)) then
                  !If so store column/row
                  !lhscol(:,m) = am(:,ilo)
                  !rhsrow(:,m) = a_inv(:,ilo)
                  col_row_tmp(:,1) = am(:,ilo)
                  col_row_tmp(:,2) = a_inv(:,ilo)
               end if
               !Here we send lhscol and rhscol sections to all procs
               !from the one on which it is currently known
               !Can't do this outside m loop as proc_id depends on m
               !These broadcasts can be relatively expensive so local_field_solve
               !may be preferable
               !call broadcast (lhscol(:,m), proc_id(f_lo(ic),ilo))
               !call broadcast (rhsrow(:,m), proc_id(f_lo(ic),ilo))
               call broadcast (col_row_tmp, proc_id(f_lo(ic),ilo))
               lhscol(:,m) = col_row_tmp(:,1)
               rhsrow(:,m) = col_row_tmp(:,2)
            end do
            !All procs will have the same lhscol and rhsrow after this loop+broadcast
            deallocate(col_row_tmp)
         end if

         !Loop over field compound dimension
         do jlo = f_lo(ic)%llim_proc, f_lo(ic)%ulim_proc
            !jskip is true similarly to iskip
            jskip = N_class(ic) > 1 !Are there any connections?
            jskip = ig_idx(f_lo(ic), jlo) == ntgrid .and. jskip !Are we at a theta grid point corresponding to the upper boundary?
            !Get 2pi domain/cell number out of total for this supercell
            n = in_idx(f_lo(ic),jlo)
            jskip = n < N_class(ic) .and. jskip !Are we not in the last cell (i.e. not at the rightmost grid point/upper end of supercell)?
            if (jskip) cycle  !Skip this point if appropriate

            !Now get m (class number)
            m = im_idx(f_lo(ic),jlo)

            !Convert class number and cell number to ik and it
            ik = f_lo(ic)%ik(m,n)
            it = f_lo(ic)%it(m,n)
            
            !Work out what the compound theta*field index is.
            irow = if_idx(f_lo(ic),jlo)

            !If ky or kx are not 0 (i.e. skip zonal 0,0 mode) then workout the array
            if (is_not_zero(aky(ik)) .or. is_not_zero(akx(it))) then
               !Get factor
               fac = am(i,jlo)/lhscol(i,m)

               !Store array element
               am(i,jlo) = fac

               !Store other elements
               am(:i-1,jlo) = am(:i-1,jlo) - lhscol(:i-1,m)*fac
               am(i+1:,jlo) = am(i+1:,jlo) - lhscol(i+1:,m)*fac
               !WOULD the above three commands be better written as
               !am(:,jlo)=am(:,jlo)-lhscol(:,m)*fac
               !am(i,jlo)=fac

               !Fill in a_inv
               if (irow == i) then
                  a_inv(:,jlo) = a_inv(:,jlo)/lhscol(i,m)
               else
                  a_inv(:,jlo) = a_inv(:,jlo) &
                       - rhsrow(:,m)*lhscol(irow,m)/lhscol(i,m)
               end if
            else
               a_inv(:,jlo) = 0.0
            end if
     
         end do
      end do

      !Free memory
      deallocate (lhscol, rhsrow)

  ! fill in skipped points for each field and supercell:
  ! Do not include internal ntgrid points in sum over supercell

      do i = 1, nidx*N_class(ic)
         !iskip is true iff the theta grid point(ig) corresponding to i
         !is at the upper end of a 2pi domain/cell and is not the rightmost gridpoint
         iskip = N_class(ic) > 1 !Are the multiple cells => are there connections/boundaries
         iskip = i <= nidx*N_class(ic) - nfield .and. iskip  !Are we not near the upper boundary of the supercell
         iskip = mod((i+nfield-1)/nfield, 2*ntgrid+1) == 0 .and. iskip !Are we at a theta grid point corresponding to the rightmost point of a 2pi domain
         iskip = i > nfield .and. iskip !Are we not at the lower boundary of the supercell
         !Zero out skipped points
         if (iskip) then
            a_inv(i,:) = 0
            cycle !Seems unnexessary
         end if
      end do
  ! Make response at internal ntgrid points identical to response
  ! at internal -ntgrid points:
      do jlo = f_lo(ic)%llim_world, f_lo(ic)%ulim_world
         !jskip is true similarly to iskip
         jskip = N_class(ic) > 1 !Are there any connections?
         jskip = ig_idx(f_lo(ic), jlo) == ntgrid .and. jskip  !Are we at a theta grid point corresponding to the upper boundary?
         jskip = in_idx(f_lo(ic), jlo) < N_class(ic) .and. jskip  !Are we not in the last cell (i.e. not at the rightmost grid point/upper end of supercell)?
         !If we previously skipped this point then we want to fill it in from the matched/connected point
         if (jskip) then
            !What is the index of the matched point?
            ilo = jlo + nfield
            !If we have ilo on this proc send it to...
            if (idx_local(f_lo(ic), ilo)) then
               !jlo on this proc
               if (idx_local(f_lo(ic), jlo)) then
                  a_inv(:,jlo) = a_inv(:,ilo)
               !jlo on proc which has jlo
               else
                  call send(a_inv(:,ilo), proc_id(f_lo(ic), jlo))
               endif
            else
               !If this proc has jlo then get ready to receive
               if (idx_local(f_lo(ic), jlo)) then
                  call receive(a_inv(:,jlo), proc_id(f_lo(ic), ilo))
               end if
            end if
         end if
      end do
      !The send receives in the above loop should be able to function in a
      !non-blocking manner fairly easily, but probably don't cost that much
      !Would require WAITALL before doing am=a_inv line below

      !Update am
      am = a_inv
    end if ! .not. skip_initialisation

    !Free memory
    deallocate (a_inv)

! Re-sort this class of aminv for runtime application.  

    !Now allocate array to store matrices for each class
    if (.not.allocated(aminv)) allocate (aminv(i_class))

! only need this large array for particular values of jlo.
! To save space, count how many this is and only allocate
! required space:

    !Initialise counter
    dc = 0
! check all members of this class
    do ilo = f_lo(ic)%llim_world, f_lo(ic)%ulim_world

! find supercell coordinates
       !i.e. what is my class of supercell and which cell am I looking at
       m = im_idx(f_lo(ic), ilo)
       n = in_idx(f_lo(ic), ilo)

! find standard coordinates
       !Get theta, field, kx and ky indexes for current point
       ig = ig_idx(f_lo(ic), ilo)
       if = ifield_idx(f_lo(ic), ilo)
       ik = f_lo(ic)%ik(m,n)
       it = f_lo(ic)%it(m,n)

! translate to fast field coordinates
       jlo = ij_idx(jf_lo, ig, if, ik, it)
          
! Locate this jlo, count it, and save address
       !Is this point on this proc, if so increment counter
       if (idx_local(jf_lo,jlo)) then
! count it
          dc = dc + 1
! save dcell address
           jf_lo%dj(ic,jlo) = dc
! save supercell address
          jf_lo%mj(jlo) = m
       endif
          
    end do

! allocate dcells and supercells in this class on this PE:
    !Loop over "fast field" index
    do jlo = jf_lo%llim_proc, jf_lo%ulim_proc
          
       !Allocate store in this class, on this proc to store the jlo points
       if (.not. allocated(aminv(ic)%dcell)) then
          allocate (aminv(ic)%dcell(dc))
       else
          !Just check the array is the correct size
          j = size(aminv(ic)%dcell)
          if (j /= dc) then
             ierr = error_unit()
             write(ierr,*) 'Error (1) in init_inverse_matrix: ',&
                  iproc,':',jlo,':',dc,':',j
          endif
       endif
       
       !Get the current "dcell" adress
       k = jf_lo%dj(ic,jlo)

       !No dcell should be 0 but this is a guard
       if (k > 0) then
          !How long is the supercell for this class?
          jc = nidx*N_class(ic)

          !Allocate storage for the supercell if required
          if (.not. allocated(aminv(ic)%dcell(k)%supercell)) then
             allocate (aminv(ic)%dcell(k)%supercell(jc))
          else
             !Just check the array is the correct size
             j = size(aminv(ic)%dcell(k)%supercell)
             if (j /= jc) then
                ierr = error_unit()
                write(ierr,*) 'Error (2) in init_inverse_matrix: ', &
                     iproc,':',jlo,':',jc,':',j
             end if
          end if
       end if
    end do

! Now fill aminv for this class:

    !Allocate temporary supercell storage
    allocate (am_tmp(nidx*N_class(ic)))

    !Loop over all grid points
    do ilo = f_lo(ic)%llim_world, f_lo(ic)%ulim_world

       !Get supercell type (class) and cell index
       m = im_idx(f_lo(ic), ilo)
       n = in_idx(f_lo(ic), ilo)
       
       !Convert to theta,field,kx and ky indexes
       ig = ig_idx(f_lo(ic), ilo)
       if = ifield_idx(f_lo(ic), ilo)
       ik = f_lo(ic)%ik(m,n)
       it = f_lo(ic)%it(m,n)
       
       !Get fast field index
       iflo = ij_idx(jf_lo, ig, if, ik, it)
 
       !If this ilo is local then...
       if (idx_local(f_lo(ic),ilo)) then
          ! send the am data to...
          if (idx_local(jf_lo,iflo)) then
             !the local proc
             am_tmp = am(:,ilo)
          else
             !the remote proc
             call send(am(:,ilo), proc_id(jf_lo,iflo))
          endif
       else
          !Get ready to receive the data
          if (idx_local(jf_lo,iflo)) then
             call receive(am_tmp, proc_id(f_lo(ic),ilo))
          end if
       end if

       !If the fast field index is on this processor
       if (idx_local(jf_lo, iflo)) then
          !Get "dcell" adress
          dc = jf_lo%dj(ic,iflo)

          !Loop over supercell size
          do jlo = 0, nidx*N_class(ic)-1
             !Convert to cell/2pi domain index
             nn = in_idx(f_lo(ic), jlo)
             
             !Get theta grid point
             jg = ig_idx(f_lo(ic), jlo)
             !Get field index
             jf = ifield_idx(f_lo(ic), jlo)
             
             !Convert index
             jsc = ij_idx(f_lo(ic), jg, jf, nn) + 1

             !Store inverse matrix data in appropriate supercell position
             aminv(ic)%dcell(dc)%supercell(jsc) = am_tmp(jlo+1)
             
          end do
       end if
    end do

    !Free memory
    deallocate (am_tmp)

    call time_message(.false.,time_field_invert,' Field Matrix Invert')
    call get_mp_times(total_time = mp_total_after)
    time_field_invert_mpi = time_field_invert_mpi + (mp_total_after - mp_total)
  end subroutine init_inverse_matrix

  !> FIXME : Add documentation  
  subroutine finish_fields_layouts
    use dist_fn, only: N_class, i_class, itright, has_linked_boundary
    use kt_grids, only: naky, ntheta0
    use gs2_layouts, only: f_lo, jf_lo, ik_idx, it_idx
    implicit none
    integer :: i, m, n, ii, ik, it, itr, jflo

    if (has_linked_boundary()) then

! Complication comes from having to order the supercells in each class
       do ii = 1, i_class
          m = 1
          do it = 1, ntheta0
             do ik = 1, naky
                call kt2ki (i, n, ik, it)
                ! If (ik, it) is in this class, continue:
                if (i == ii) then
                   ! Find left end of links
                   if (n == 1) then
                      f_lo(i)%ik(m,n) = ik
                      f_lo(i)%it(m,n) = it
                      itr = it
                      ! Follow links to the right
                      do n = 2, N_class(i)
                         itr = itright (itr, ik)
                         f_lo(i)%ik(m,n) = ik
                         f_lo(i)%it(m,n) = itr
                      end do
                      m = m + 1
                   end if
                end if
             end do
          end do
       end do
       
    ! initialize ij matrix
       
       do jflo = jf_lo%llim_proc, jf_lo%ulim_proc
          ik = ik_idx(jf_lo, jflo)
          it = it_idx(jf_lo, jflo)
          
          call kt2ki ( jf_lo%ij(jflo), n, ik, it)
          
       end do

    else
       m = 0
       do it = 1, ntheta0
          do ik = 1, naky
             m = m + 1
             f_lo(1)%ik(m,1) = ik
             f_lo(1)%it(m,1) = it
          end do
       end do
       
        jf_lo%ij = 1
    end if

  end subroutine finish_fields_layouts

  !> FIXME : Add documentation  
  subroutine kt2ki (i, n, ik, it)
    use mp, only: mp_abort
    use file_utils, only: error_unit
    use dist_fn, only: l_links, r_links, N_class, i_class
    implicit none
    integer, intent (in) :: ik, it
    integer, intent (out) :: i, n
    integer :: nn, ierr
!
! Get size of this supercell
!
    nn = 1 + l_links(it, ik) + r_links(it, ik)
!
! Find i = N_class**-1(nn)
!
    do i = 1, i_class
       if (N_class(i) == nn) exit
    end do
!
! Consistency check:
!
    if (N_class(i) /= nn) then
       ierr = error_unit()
       write(ierr,*) 'Error in kt2ki:'
       write(ierr,*) 'i = ',i,' ik = ',ik,' it = ',it,&
            ' N(i) = ',N_class(i),' nn = ',nn
       call mp_abort('Error in kt2ki')
    end if
! 
! Get position in this supercell, counting from the left
!
    n = 1 + l_links(it, ik)

  end subroutine kt2ki

  !> A routine to dump the current response matrix to file
  subroutine dump_response_to_file_imp(suffix)
    use fields_arrays, only: get_specific_response_file_name, time_dump_response
    use gs2_time, only: code_dt
    use theta_grid, only: ntgrid
    use kt_grids, only: naky, ntheta0
    use dist_fn, only: i_class, N_class, M_class, get_leftmost_it, itright
    use gs2_layouts, only: f_lo, jf_lo, ij_idx, idx_local, idx, proc_id,idx_local
    use mp, only: proc0, send, receive
    use gs2_save, only: gs2_save_response
    use job_manage, only: time_message
    implicit none
    character(len=*), optional, intent(in) :: suffix !If passed then use as part of file suffix
    complex, dimension(:,:), allocatable :: tmp_arr, tmp_arr_full
    complex, dimension(:), allocatable :: tmp_vec_full, tmp_vec
    integer :: ic, im, ik, it, itmin, supercell_length, supercell_length_bound, in, ifld, ig, is_tmp
    integer :: jflo, dc, nn, in_tmp, icount, it_tmp, nl, nr, ifld_tmp, ext_dom_length, ig_tmp, cur_idx
    integer, dimension(:,:), allocatable :: it_to_is, leftmost_it
    integer, dimension(:), allocatable :: tmp_ints
    logical :: is_local
    call time_message(.false.,time_dump_response,' Field Dump')

    !Make a lookup array to convert itmin (the leftmost it in a connected domain)
    !to the supercell index "is" used in the local fields. This will be used to 
    !ensure equivalent files can be given the same name.
    allocate(it_to_is(ntheta0,naky),leftmost_it(ntheta0,naky),tmp_ints(ntheta0))
    it_to_is=0
    !//Note the following code is mostly borrowed from fm_init in the local fields
    
    !First find all the leftmost it
    do ik=1,naky
       do it=1,ntheta0
          leftmost_it(it,ik)=get_leftmost_it(it,ik)
       enddo
    enddo

    !Now find supercell ids for each ky at a time
    do ik=1,naky
       tmp_ints=leftmost_it(:,ik)
       it_tmp=0
       is_tmp=0
       do while(sum(tmp_ints)/=-1*ntheta0)
          it_tmp=it_tmp+1
          cur_idx=tmp_ints(it_tmp)

          !If we've seen this domain skip
          if(cur_idx==-1)cycle

          !Increment counter
          is_tmp=is_tmp+1

          !Here we store the value
          it_to_is(it_tmp,ik)=is_tmp

          !Now we set all other connected locations to -1
          !and store the appropriate is value
          do it=1,ntheta0
             if(tmp_ints(it)==cur_idx) then
                tmp_ints(it)=-1
                it_to_is(it,ik)=is_tmp
             endif
          enddo
       enddo
    enddo

    !Cleanup
    deallocate(tmp_ints)

    !/End of borrowed code

    !Notation recap:
    ! A class refers to all independent domains with the same length
    ! i_class is how many classes we have
    ! N_class(i_class) is how many 2Pi domains are in each member of i_class
    ! M_class(i_class) is how many independent domains are in i_class

    allocate(tmp_vec(nfield*(2*ntgrid+1)))
    allocate(tmp_arr(1+(2*ntgrid),nfield))

    !Loop over classes (supercell length)
    do ic=1,i_class
       !Extended domain length
       ext_dom_length=1+(2*ntgrid)*N_class(ic)
       !Work out how long the supercell is
       supercell_length=nfield*ext_dom_length !Without boundary points
       supercell_length_bound=(1+2*ntgrid)*nfield*N_class(ic) !With boundary points

       !Make storage
       allocate(tmp_arr_full(supercell_length,supercell_length))
       allocate(tmp_vec_full(supercell_length))

       !Now loop over all members of this class
       do im=1,M_class(ic)
          !Now we are thinking about a single supercell
          !we can get certain properties before looping
          !over the individual elements
          
          !Get the ik index
          ik=f_lo(ic)%ik(im,1)

          !Get the leftmost it index (named itmin to match local field routines)
          !This is currently used to identify the supercell like "is" is used in
          !the local field routines. It would be nice to also use "is" here (or
          !"itmin" there).
          itmin=leftmost_it(f_lo(ic)%it(im,1),ik)
          
          !Now we have the basic properties we want to loop over the elements
          !First initialise "it"
          it=itmin

          !Initialise counter
          icount=1

          !The order of the field, cell and theta loops below is chosen
          !in order to match the data layout of the fields local matrix

          !Loop over the fields
          do ifld=1,nfield

             !Reinitialise "it" back to the start of the supercell chain
             it=itmin

             !Loop over the different it (2Pi domains)
             do in=1,N_class(ic)
                !Loop over theta
                do ig=-ntgrid,ntgrid
                   !Skip the duplicate boundary points
                   if((ig==ntgrid).and.(in/=N_class(ic))) cycle

                   !Convert to jf_lo index
                   jflo=ij_idx(jf_lo,ig,ifld,ik,it)

                   !See if it's local
                   is_local=idx_local(jf_lo,jflo)

                   !If it's not local then we have nothing to do
                   !unless we're the proc who writes (proc0).
                   if(.not.(is_local.or.proc0)) cycle

                   !Now pack tmp_vec and do communications if needed
                   if(is_local)then
                      !Get dcell index
                      dc=jf_lo%dj(ic,jflo)

                      !Now we pack the tmp_vec in the correct order
                      !whilst ignoring the repeated boundary points
                      !We need to pick the value of "n" in the right order
                      it_tmp=itmin
                      do in_tmp=1,N_class(ic)
                         !Pick the correct n
                         do nn=1,N_class(ic)
                            if(f_lo(ic)%it(im,nn)==it_tmp) exit
                         enddo

                         !Now we can get supercell range (including boundaries)
                         nl=1+nidx*(nn-1)
                         nr=nl+nidx-1
                         
                         !Extract section
                         tmp_vec=aminv(ic)%dcell(dc)%supercell(nl:nr)

                         !All that remains now is to ignore the boundary points
                         !To do this we just split on the field so we can ignore
                         !boundary if we want
                         do ifld_tmp=1,nfield
                            nl=1+(ifld_tmp-1)*(2*ntgrid+1)
                            nr=nl+2*ntgrid
                            tmp_arr(:,ifld_tmp)=tmp_vec(nl:nr)
                         enddo

                         !Now we need to work out where to put things in tmp_vec_full
                         !In doing this we try to match the local fields data layout
                         !to aid comparisons
                         do ifld_tmp=1,nfield
                            do ig_tmp=1,2*ntgrid+1
                               !Skip boundary points
                               if((ig_tmp==(2*ntgrid+1)).and.(in_tmp/=N_class(ic))) cycle

                               !Get index
                               cur_idx=ig_tmp+(2*ntgrid)*(in_tmp-1)+(ifld_tmp-1)*ext_dom_length

                               !Store data
                               tmp_vec_full(cur_idx)=tmp_arr(ig_tmp,ifld_tmp)
                            enddo
                         enddo

                         !If we're not at the last cell then increment it_tmp.
                         !This check is needed to avoid trying to access itright
                         !in non-box simulations where this array is not allocated.
                         if(in_tmp < N_class(ic)) then
                            it_tmp=itright(it_tmp, ik)
                         end if

                      enddo

                      !No comms needed if on proc0
                      if(.not.proc0) call send(tmp_vec_full,0)
                   else
                      !Only proc0 should get here but test anyway
                      if(proc0) call receive(tmp_vec_full,proc_id(jf_lo,jflo))
                   endif

                   !Now we need to store in the full array
                   !May need to check index order matches local case.
                   if(proc0) then
                      tmp_arr_full(:,icount)=tmp_vec_full
                   endif

                   !Increment counter
                   icount=icount+1
                enddo

                !If we're not at the last cell then increment it.
                !This check is needed to avoid trying to access itright
                !in non-box simulations where this array is not allocated.
                if(in < N_class(ic)) then
                   it=itright(it, ik)
                end if
             enddo
          enddo

          !Now make file name
          if(proc0)then
             call gs2_save_response(tmp_arr_full, get_specific_response_file_name(ik, it_to_is(itmin, ik), code_dt, suffix), code_dt)
          endif
       end do
          
       deallocate(tmp_arr_full,tmp_vec_full)
    end do

    !Tidy
    deallocate(tmp_vec,tmp_arr,leftmost_it,it_to_is)
    call time_message(.false.,time_dump_response,' Field Dump')
  end subroutine dump_response_to_file_imp

  !> A routine to read the response matrix from file and populate the implicit
  !> response storage, note we also allocate the response storage objects
  subroutine read_response_from_file_imp(could_read, suffix)
    use fields_arrays, only: get_specific_response_file_name, time_read_response
    use gs2_time, only: code_dt
    use theta_grid, only: ntgrid
    use kt_grids, only: naky, ntheta0
    use dist_fn, only: i_class, N_class, M_class, get_leftmost_it, itright
    use gs2_layouts, only: f_lo, jf_lo, ij_idx, idx_local, idx, proc_id,idx_local
    use mp, only: proc0, send, receive
    use gs2_save, only: gs2_restore_response
    use file_utils, only: error_unit
    use job_manage, only: time_message
    implicit none
    logical, intent(out) :: could_read
    character(len=*), optional, intent(in) :: suffix !If passed then use as part of file suffix
    complex, dimension(:,:), allocatable :: tmp_arr, tmp_arr_full
    complex, dimension(:), allocatable :: tmp_vec_full, tmp_vec
    integer :: ic, im, ik, it, itmin, supercell_length, supercell_length_bound, in, ifld, ig, is_tmp
    integer :: jflo, dc, nn, in_tmp, icount, it_tmp, nl, nr, ifld_tmp, ext_dom_length, ig_tmp, cur_idx
    integer :: jflo_dup, dc_dup
    integer, dimension(:,:), allocatable :: it_to_is, leftmost_it
    integer, dimension(:), allocatable :: tmp_ints
    logical :: is_local, is_local_dup
    character(len = 256) :: file_name
    logical :: file_found
    real :: file_time_step

    call time_message(.false.,time_read_response,' Field Read')
    could_read = .true.

    !First allocate the matrix storage
    call alloc_response_objects

    !Make a lookup array to convert itmin (the leftmost it in a connected domain)
    !to the supercell index "is" used in the local fields. This will be used to 
    !ensure equivalent files can be given the same name.
    allocate(it_to_is(ntheta0,naky),leftmost_it(ntheta0,naky),tmp_ints(ntheta0))
    it_to_is=0
    !//Note the following code is mostly borrowed from fm_init in the local fields
    
    !First find all the leftmost it
    do ik=1,naky
       do it=1,ntheta0
          leftmost_it(it,ik)=get_leftmost_it(it,ik)
       enddo
    enddo

    !Now find supercell ids for each ky at a time
    do ik=1,naky
       tmp_ints=leftmost_it(:,ik)
       it_tmp=0
       is_tmp=0
       do while(sum(tmp_ints)/=-1*ntheta0)
          it_tmp=it_tmp+1
          cur_idx=tmp_ints(it_tmp)

          !If we've seen this domain skip
          if(cur_idx==-1)cycle

          !Increment counter
          is_tmp=is_tmp+1

          !Here we store the value
          it_to_is(it_tmp,ik)=is_tmp

          !Now we set all other connected locations to -1
          !and store the appropriate is value
          do it=1,ntheta0
             if(tmp_ints(it)==cur_idx) then
                tmp_ints(it)=-1
                it_to_is(it,ik)=is_tmp
             endif
          enddo
       enddo
    enddo

    !Cleanup
    deallocate(tmp_ints)

    !/End of borrowed code

    !Notation recap:
    ! A class refers to all independent domains with the same length
    ! i_class is how many classes we have
    ! N_class(i_class) is how many 2Pi domains are in each member of i_class
    ! M_class(i_class) is how many independent domains are in i_class

    allocate(tmp_vec(nfield*(2*ntgrid+1)))
    allocate(tmp_arr(1+(2*ntgrid),nfield))

    !Loop over classes (supercell length)
    do ic=1,i_class
       !Extended domain length
       ext_dom_length=1+(2*ntgrid)*N_class(ic)
       !Work out how long the supercell is
       supercell_length=nfield*ext_dom_length !Without boundary points
       supercell_length_bound=(1+2*ntgrid)*nfield*N_class(ic) !With boundary points

       !Make storage
       allocate(tmp_arr_full(supercell_length,supercell_length))
       allocate(tmp_vec_full(supercell_length))

       !Now loop over all members of this class
       do im=1,M_class(ic)
          tmp_arr_full=0.
          tmp_vec_full=0.

          !Now we are thinking about a single supercell
          !we can get certain properties before looping
          !over the individual elements
          
          !Get the ik index
          ik=f_lo(ic)%ik(im,1)

          !Get the leftmost it index (named itmin to match local field routines)
          !This is currently used to identify the supercell like "is" is used in
          !the local field routines. It would be nice to also use "is" here (or
          !"itmin" there).
          itmin=leftmost_it(f_lo(ic)%it(im,1),ik)
          
          !Now make file name
          file_name = adjustl(get_specific_response_file_name(ik, it_to_is(itmin, ik), code_dt, suffix))

          ! First check if the expected file exists, if not then exit
          inquire( file = trim(file_name), exist = file_found)
          if (.not. file_found) then
             if(proc0) write(error_unit(), &
                  '("Could not find response file ",A,", reverting to calculation.")') trim(file_name)
             could_read = .false.
             return
          end if

          if(proc0)then
             call gs2_restore_response(tmp_arr_full, file_name, file_time_step)
             ! Should perhaps double check that file_time_step == code_dt?
          endif

          !Initialise counter
          icount=1

          !Loop over the fields
          do ifld=1,nfield
             !Reinitialise "it" back to the start of the supercell chain
             it=itmin

             !Loop over the different it (2Pi domains)
             do in=1,N_class(ic)
                !Loop over theta
                do ig=-ntgrid,ntgrid
                   !Skip the duplicate boundary points -- This is no good here. !<DD>
!                   if((ig.eq.ntgrid).and.(in.ne.N_class(ic))) cycle

                   !Convert to jf_lo index
                   jflo=ij_idx(jf_lo,ig,ifld,ik,it)

                   !See if it's local
                   is_local=idx_local(jf_lo,jflo)

                   !If it's not local then we have nothing to do
                   !unless we're the proc who writes (proc0).
                   if(.not.(is_local.or.proc0)) cycle

                   !Get row
                   if(proc0)then
                      tmp_vec_full=tmp_arr_full(:,icount)
                      
                      !Increment counter
                      if(.not.(ig==ntgrid.and.in/=N_Class(ic))) icount=icount+1
                   endif

                   !Now unpack tmp_vec_full and do communications if needed
                   if(is_local)then
                      !No comms needed if local
                      if(.not.proc0) call receive(tmp_vec_full,0)

                      !Get dcell index
                      dc=jf_lo%dj(ic,jflo)

                      !Now we pack the tmp_vec in the correct order
                      !We must fill in the boundary points
                      !We need to pick the value of "n" in the right order
                      it_tmp=itmin
                      do in_tmp=1,N_class(ic)
                         tmp_arr=0
                         tmp_vec=0

                         !Now we need to work out where to put things in tmp_vec_full
                         !In doing this we try to match the local fields data layout
                         !to aid comparisons
                         do ifld_tmp=1,nfield
                            do ig_tmp=1,2*ntgrid+1
                               !Skip boundary points
                               if((ig_tmp==(2*ntgrid+1)).and.(in_tmp/=N_class(ic))) cycle

                               !Get index
                               cur_idx=ig_tmp+(2*ntgrid)*(in_tmp-1)+(ifld_tmp-1)*ext_dom_length

                               !Store data
                               tmp_arr(ig_tmp,ifld_tmp)=tmp_vec_full(cur_idx)
                            enddo
                         enddo

                         !<DD>It may be anticipated that we need to fix the boundary points
                         !here but we don't actually need to do anything.
                         !Because we sum over the entire supercell in getfield we only want
                         !the repeated boundary point to be included once.
                         !We still need to calculate the field at the repeated point but the
                         !fix for that is handled at the bottom of the routine
                         !In other words we don't need something of the form:
                         ! !Fix boundary points
                         ! if(in_tmp.ne.N_class(ic))then
                         !    do ifld_tmp=1,nfield
                         !       cur_idx=1+(2*ntgrid)*(in_tmp)+(ifld_tmp-1)*ext_dom_length
                         !       tmp_arr(2*ntgrid+1,ifld_tmp)=tmp_vec_full(cur_idx)
                         !    enddo
                         ! endif

                         !Store in correct order
                         do ifld_tmp=1,nfield
                            nl=1+(ifld_tmp-1)*(2*ntgrid+1)
                            nr=nl+2*ntgrid
                            tmp_vec(nl:nr)=tmp_arr(:,ifld_tmp)
                         enddo

                         !Pick the correct n
                         do nn=1,N_class(ic)
                            if(f_lo(ic)%it(im,nn)==it_tmp) exit
                         enddo

                         !Now we can get supercell range (including boundaries)
                         nl=1+nidx*(nn-1)
                         nr=nl+nidx-1

                         !Store section
                         aminv(ic)%dcell(dc)%supercell(nl:nr)=tmp_vec

                         !If we're not at the last cell then increment it_tmp.
                         !This check is needed to avoid trying to access itright
                         !in non-box simulations where this array is not allocated.
                         if(in_tmp < N_class(ic)) then
                            it_tmp=itright(it_tmp, ik)
                         end if

                      enddo
                   else
                      !Only proc0 should get here but test anyway
                      if(proc0) call send(tmp_vec_full,proc_id(jf_lo,jflo))
                   endif
                enddo

                !If we're not at the last cell then increment it.
                !This check is needed to avoid trying to access itright
                !in non-box simulations where this array is not allocated.
                if(in < N_class(ic)) then
                   it=itright(it, ik)
                end if
             enddo
          enddo

          !Now we need to fill in the repeated boundary points

          !If there are no boundary points then advance
          if(N_class(ic)==1) cycle
          it=itmin
          do in=1,N_class(ic)-1
             do ifld=1,nfield
                !First get the index of the point we want to fill
                jflo=ij_idx(jf_lo,ntgrid,ifld,ik,it)

                !Now we get the index of the point which has this data
                jflo_dup=ij_idx(jf_lo,-ntgrid,ifld,ik,itright(it, ik))

                !Now get locality
                is_local=idx_local(jf_lo,jflo)
                is_local_dup=idx_local(jf_lo,jflo_dup)

                !Get dcell values
                if(is_local) dc=jf_lo%dj(ic,jflo)
                if(is_local_dup) dc_dup=jf_lo%dj(ic,jflo_dup)

                !Now copy/communicate
                if(is_local)then
                   if(is_local_dup)then
                      aminv(ic)%dcell(dc)%supercell=aminv(ic)%dcell(dc_dup)%supercell
                   else
                      call receive(aminv(ic)%dcell(dc)%supercell,proc_id(jf_lo,jflo_dup))
                   endif
                elseif(is_local_dup)then
                   call send(aminv(ic)%dcell(dc_dup)%supercell,proc_id(jf_lo,jflo))
                endif
             enddo

             !Increment it -- note we don't need to guard against access to itright
             !here as for non-box runs we won't enter the outer in=1,N_class(ic)-1
             !loop as N_class(ic) == 1
             it=itright(it,ik)
          enddo
       end do
       
       !Free
       deallocate(tmp_arr_full,tmp_vec_full)
    end do

    !Tidy
    deallocate(tmp_vec,tmp_arr,leftmost_it,it_to_is)
    call time_message(.false.,time_read_response,' Field Read')
  end subroutine read_response_from_file_imp

  !> A subroutine to allocate the response matrix storage objects
  subroutine alloc_response_objects
    use dist_fn, only: i_class, N_class
    use gs2_layouts, only: jf_lo, f_lo, im_idx, in_idx, ig_idx, ifield_idx, ij_idx, idx_local
    use theta_grid, only: ntgrid
    implicit none
    integer :: ic, idc, sc_len, ilo, dc, im, in, ig, ifld, ik, it, jlo

    !Top level, one object for each class (length of supercell)
    if(.not.allocated(aminv)) allocate(aminv(i_class))

    !Loop over each class
    do ic=1,i_class
       !Get the supercell length
       sc_len=(2*ntgrid+1)*nfield*N_class(ic)

       !Count how many dcell we have locally and fill related data
       dc=0
       do ilo=f_lo(ic)%llim_world,f_lo(ic)%ulim_world
          !i.e. what is my class of supercell and which cell am I looking at
          im = im_idx(f_lo(ic), ilo)
          in = in_idx(f_lo(ic), ilo)

          ! find standard coordinates
          !Get theta, field, kx and ky indexes for current point
          ig = ig_idx(f_lo(ic), ilo)
          ifld = ifield_idx(f_lo(ic), ilo)
          ik = f_lo(ic)%ik(im,in)
          it = f_lo(ic)%it(im,in)
          
          ! translate to fast field coordinates
          jlo = ij_idx(jf_lo, ig, ifld, ik, it)
          
          ! Locate this jlo, count it, and save address
          !Is this point on this proc, if so increment counter
          if (idx_local(jf_lo,jlo)) then
             ! count it
             dc = dc + 1
             ! save dcell address
             jf_lo%dj(ic,jlo) = dc
             ! save supercell address
             jf_lo%mj(jlo) = im
          endif
       enddo

       !Next level, one object for each point in the class
       if(.not. allocated(aminv(ic)%dcell))then
          allocate(aminv(ic)%dcell(dc))
       endif

       !Now loop over each point and allocate storage for the response data
       do idc=1,dc
          !Bottom level, this is actually where data is stored
          if(.not. allocated(aminv(ic)%dcell(idc)%supercell)) then
             allocate(aminv(ic)%dcell(idc)%supercell(sc_len))
          endif
       enddo
    enddo

  end subroutine alloc_response_objects
end module fields_implicit