init_diffuse_conserve Subroutine

private subroutine init_diffuse_conserve()

Precompute three quantities needed for momentum and energy conservation: bz0, bw0, bs0 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Now get z0 (first form) $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, ie, il, is, isgn, ig) & $OMP SHARED(g_lo, ntgrid, forbid, bz0tmp, code_dt, vnmult, vnew_E, & $OMP aj0, duinv, conserve_forbid_zero) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Now get v0z0 $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, ie, is, isgn) & $OMP SHARED(g_lo, gtmp, vnmult, vnew_E, aj0, bz0tmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Redefine z0 = z0 / (1 + v0z0) $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, is, isgn) & $OMP SHARED(g_lo, bz0tmp, dtmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! redefine dq = du (for momentum-conserving terms) du == int (E nu_s f_0); du = du(z, kx, ky, s) duinv = 1/du $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, ie, is, isgn) & $OMP SHARED(g_lo, gtmp, vns, vpa) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Now get s0 (first form) $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, ie, is, isgn) & $OMP SHARED(g_lo, bs0tmp, vns, vpa, aj0, code_dt, duinv) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Now get v0s0 $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, ie, is, isgn) & $OMP SHARED(g_lo, gtmp, vnmult, vnew_E, aj0, bs0tmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Redefine s0 = s0 - v0s0 * z0 / (1 + v0z0) $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, is, isgn) & $OMP SHARED(g_lo, bs0tmp, dtmp, bz0tmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Now get v1s0 $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, ie, is, isgn) & $OMP SHARED(g_lo, gtmp, vns, vpa, aj0, bs0tmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Redefine s0 = s0 / (1 + v0s0) $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, is, isgn) & $OMP SHARED(g_lo, bs0tmp, dtmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Now get w0 $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, ie, il, is, isgn, ig) & $OMP SHARED(g_lo, ntgrid, forbid, bw0tmp, vns, energy, al, aj1, code_dt, & $OMP spec, kperp2, duinv, bmag, conserve_forbid_zero) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Now get v0w0 $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, ie, is, isgn) & $OMP SHARED(g_lo, gtmp, vnmult, vnew_E, aj0, bw0tmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Redefine w0 = w0 - v0w0 * z0 / (1 + v0z0) (this is w1 from MAB notes) $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, is, isgn) & $OMP SHARED(g_lo, bw0tmp, bz0tmp, dtmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Get v1w1 $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, ie, is, isgn) & $OMP SHARED(g_lo, gtmp, vns, vpa, aj0, bw0tmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Redefine w0 = w1 - v1w1 * s1 / (1 + v1s1) (this is w2 from MAB notes) $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, is, isgn) & $OMP SHARED(g_lo, bw0tmp, bs0tmp, dtmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Get v2w2 $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, ie, il, is, isgn) & $OMP SHARED(g_lo, gtmp, vns, energy, al, aj1, bw0tmp) & $OMP SCHEDULE(static) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Redefine w0 = w2 / (1 + v2w2) $OMP PARALLEL DO DEFAULT(none) & $OMP PRIVATE(iglo, ik, it, is, isgn) & $OMP SHARED(g_lo, bw0tmp, dtmp) & $OMP SCHEDULE(static)

Arguments

None

Contents

Source Code


Source Code

  subroutine init_diffuse_conserve
    use gs2_layouts, only: g_lo, ie_idx, is_idx, ik_idx, il_idx, it_idx
    use species, only: nspec, spec
    use kt_grids, only: naky, ntheta0, kperp2
    use theta_grid, only: ntgrid, bmag
    use le_grids, only: energy => energy_maxw, al, integrate_moment, negrid, forbid
    use gs2_time, only: code_dt
    use dist_fn_arrays, only: aj0, aj1, vpa
    use le_grids, only: g2le
    use gs2_layouts, only: le_lo
    use redistribute, only: gather, scatter
    use array_utils, only: zero_array
    implicit none
    real, dimension (:,:,:), allocatable :: gtmp
    real, dimension (:,:,:,:), allocatable :: duinv, dtmp, vns
    integer :: ie, il, ik, is, isgn, iglo, it, ig
    complex, dimension (:,:,:), allocatable :: ctmp, z_big
    complex, dimension (:,:,:), allocatable :: bs0tmp, bw0tmp, bz0tmp
    logical, parameter :: all_procs = .true.

    if(use_le_layout) then
       allocate (ctmp(nxi_lim, negrid+1, le_lo%llim_proc:le_lo%ulim_alloc))
       ! We need to initialise ctmp as it is used as receiving buffer in
       ! g2le redistribute, which doesn't populate all elements
       call zero_array(ctmp)
    end  if

    allocate(bs0tmp(-ntgrid:ntgrid,2,g_lo%llim_proc:g_lo%ulim_alloc))
    allocate(bw0tmp(-ntgrid:ntgrid,2,g_lo%llim_proc:g_lo%ulim_alloc))
    allocate(bz0tmp(-ntgrid:ntgrid,2,g_lo%llim_proc:g_lo%ulim_alloc))

    allocate (gtmp(-ntgrid:ntgrid,2,g_lo%llim_proc:g_lo%ulim_alloc))
    allocate (duinv(-ntgrid:ntgrid, ntheta0, naky, nspec))
    allocate (dtmp(-ntgrid:ntgrid, ntheta0, naky, nspec))
    allocate (vns(naky,negrid,nspec,2))

    ! Following might only be needed if any kwork_filter
    call zero_array(duinv)
    call zero_array(dtmp)

    vns(:,:,:,1) = vnmult(2)*delvnew
    vns(:,:,:,2) = vnmult(2)*vnew_s

    ! first obtain 1/du
    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, ie, is, isgn) &
    !$OMP SHARED(g_lo, gtmp, energy, vnmult, vnew_E) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          gtmp(:,isgn,iglo) = energy(ie)*vnmult(2)*vnew_E(ik,ie,is)
       end do
    end do
    !$OMP END PARALLEL DO

    call integrate_moment (gtmp, duinv, all_procs)  ! not 1/du yet

    ! Could replace this with OpenMP using an explicit loop. TAG
    where (abs(duinv) > epsilon(0.0))  ! necessary b/c some species may have vnewk=0
       !  duinv=0 iff vnew=0 so ok to keep duinv=0.
       duinv = 1 / duinv  ! now it is 1/du
    end where

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ! Now get z0 (first form)
    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, ie, il, is, isgn, ig) &
    !$OMP SHARED(g_lo, ntgrid, forbid, bz0tmp, code_dt, vnmult, vnew_E, &
    !$OMP aj0, duinv, conserve_forbid_zero) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       it = it_idx(g_lo,iglo)
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       il = il_idx(g_lo,iglo)
       do isgn = 1, 2
          do ig=-ntgrid,ntgrid
             ! u0 = -nu_E E dt J0 f_0 / du
             if ( .not. (forbid(ig,il) .and. conserve_forbid_zero) ) then
                bz0tmp(ig,isgn,iglo) = -code_dt*vnmult(2)*vnew_E(ik,ie,is) &
                     * aj0(ig,iglo)*duinv(ig,it,ik,is)
             else
                bz0tmp(ig,isgn,iglo) = 0.
             endif
          end do
       end do
    end do
    !$OMP END PARALLEL DO

    call zero_out_passing_hybrid_electrons(bz0tmp)

    if(use_le_layout) then
       call gather (g2le, bz0tmp, ctmp)
       call solfp_ediffuse_le_layout (ctmp)
       call scatter (g2le, ctmp, bz0tmp)   ! bz0 is redefined below
    else
       call solfp_ediffuse_standard_layout (bz0tmp)   ! bz0 is redefined below
    end if

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Now get v0z0

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, ie, is, isgn) &
    !$OMP SHARED(g_lo, gtmp, vnmult, vnew_E, aj0, bz0tmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          ! v0 = nu_E E J0 f_0
          gtmp(:,isgn,iglo) = vnmult(2) * vnew_E(ik,ie,is) * aj0(:,iglo) &
               * real(bz0tmp(:,isgn,iglo))
       end do
    end do
    !$OMP END PARALLEL DO

    call integrate_moment (gtmp, dtmp, all_procs) ! v0z0

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Redefine z0 = z0 / (1 + v0z0)

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, is, isgn) &
    !$OMP SHARED(g_lo, bz0tmp, dtmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       it = it_idx(g_lo,iglo)
       ik = ik_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          bz0tmp(:,isgn,iglo) = bz0tmp(:,isgn,iglo) / (1.0 + dtmp(:,it,ik,is))
       end do
    end do
    !$OMP END PARALLEL DO

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    ! redefine dq = du (for momentum-conserving terms)
    ! du == int (E nu_s f_0);  du = du(z, kx, ky, s)
    ! duinv = 1/du
    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, ie, is, isgn) &
    !$OMP SHARED(g_lo, gtmp, vns, vpa) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          gtmp(:, isgn, iglo)  = vns(ik, ie, is, 1) * vpa(:, isgn, iglo)**2
       end do
    end do
    !$OMP END PARALLEL DO

    call integrate_moment (gtmp, duinv, all_procs)  ! not 1/du yet

    ! Could replace this with OpenMP using an explicit loop. TAG
    where (abs(duinv) > epsilon(0.0))  ! necessary b/c some species may have vnewk=0
       !  duinv=0 iff vnew=0 so ok to keep duinv=0.
       duinv = 1 / duinv  ! now it is 1/du
    end where

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Now get s0 (first form)
    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, ie, is, isgn) &
    !$OMP SHARED(g_lo, bs0tmp, vns, vpa, aj0, code_dt, duinv) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       it = it_idx(g_lo,iglo)
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          ! u1 = -3 nu_s vpa dt J0 f_0 / du
          bs0tmp(:, isgn, iglo) = -vns(ik, ie, is, 1) * vpa(:, isgn, iglo) &
               * aj0(:, iglo) * code_dt * duinv(:, it, ik, is)
       end do
    end do
    !$OMP END PARALLEL DO

    call zero_out_passing_hybrid_electrons(bs0tmp)

    if(use_le_layout) then
       call gather (g2le, bs0tmp, ctmp)
       call solfp_ediffuse_le_layout (ctmp)
       call scatter (g2le, ctmp, bs0tmp)   ! bs0
    else
       call solfp_ediffuse_standard_layout (bs0tmp)    ! s0
    end if

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Now get v0s0

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, ie, is, isgn) &
    !$OMP SHARED(g_lo, gtmp, vnmult, vnew_E, aj0, bs0tmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          ! v0 = nu_E E J0
          gtmp(:, isgn, iglo) = vnmult(2) * vnew_E(ik, ie, is) * aj0(:, iglo) &
               * real(bs0tmp(:, isgn, iglo))
       end do
    end do
    !$OMP END PARALLEL DO

    call integrate_moment (gtmp, dtmp, all_procs)    ! v0s0

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Redefine s0 = s0 - v0s0 * z0 / (1 + v0z0)

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, is, isgn) &
    !$OMP SHARED(g_lo, bs0tmp, dtmp, bz0tmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       it = it_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn=1,2
          bs0tmp(:, isgn, iglo) = bs0tmp(:, isgn, iglo) - dtmp(:, it, ik, is) &
               * bz0tmp(:, isgn, iglo)
       end do
    end do
    !$OMP END PARALLEL DO

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Now get v1s0

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, ie, is, isgn) &
    !$OMP SHARED(g_lo, gtmp, vns, vpa, aj0, bs0tmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          ! v1 = (nu_s - nu_D) vpa J0
          gtmp(:, isgn, iglo) = vns(ik, ie, is, 1) * vpa(:, isgn, iglo) * aj0(:, iglo) &
               * real(bs0tmp(:, isgn, iglo))
       end do
    end do
    !$OMP END PARALLEL DO

    call integrate_moment (gtmp, dtmp, all_procs)    ! v1s0

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Redefine s0 = s0 / (1 + v0s0)

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, is, isgn) &
    !$OMP SHARED(g_lo, bs0tmp, dtmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       it = it_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn=1,2
          bs0tmp(:, isgn, iglo) = bs0tmp(:, isgn, iglo) / (1.0 + dtmp(:, it, ik, is))
       end do
    end do
    !$OMP END PARALLEL DO

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Now get w0
    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, ie, il, is, isgn, ig) &
    !$OMP SHARED(g_lo, ntgrid, forbid, bw0tmp, vns, energy, al, aj1, code_dt, &
    !$OMP spec, kperp2, duinv, bmag, conserve_forbid_zero) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       it = it_idx(g_lo,iglo)
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       il = il_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          do ig=-ntgrid,ntgrid
             ! u0 = -3 dt J1 vperp vus a f0 / du
             if ( .not. (forbid(ig,il) .and. conserve_forbid_zero)) then
                ! Note: energy * al * smz^2 * kperp2 / bmag is alpha^2 where
                ! alpha is the argument to the Bessel function, i.e. aj1 = J1(alpha) / alpha
                ! This appears to leave us with alpha * J1(alpha) whilst Barnes' paper
                ! only includes terms with J1(alpha). Note that alpha = vperp kperp smz/B
                ! so alpha * J1(alpha) = (kperp * smz / B) * vperp J1
                bw0tmp(ig, isgn, iglo) = -vns(ik, ie, is, 1) * energy(ie) * al(il) &
                     * aj1(ig, iglo) * code_dt * spec(is)%smz**2 * kperp2(ig, it, ik) * &
                     duinv(ig, it, ik, is) / bmag(ig)
             else
                bw0tmp(ig, isgn, iglo) = 0.
             endif
          end do
       end do
    end do
    !$OMP END PARALLEL DO

    call zero_out_passing_hybrid_electrons(bw0tmp)

    if(use_le_layout) then
       call gather (g2le, bw0tmp, ctmp)
       call solfp_ediffuse_le_layout (ctmp)
       call scatter (g2le, ctmp, bw0tmp)
    else
       call solfp_ediffuse_standard_layout (bw0tmp)
    end if

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Now get v0w0

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, ie, is, isgn) &
    !$OMP SHARED(g_lo, gtmp, vnmult, vnew_E, aj0, bw0tmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          ! v0 = nu_E E J0
          gtmp(:, isgn, iglo) = vnmult(2) * vnew_E(ik, ie, is) * aj0(:, iglo) &
               * real(bw0tmp(:, isgn, iglo))
       end do
    end do
    !$OMP END PARALLEL DO

    call integrate_moment (gtmp, dtmp, all_procs)    ! v0w0

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Redefine w0 = w0 - v0w0 * z0 / (1 + v0z0) (this is w1 from MAB notes)

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, is, isgn) &
    !$OMP SHARED(g_lo, bw0tmp, bz0tmp, dtmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       it = it_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          bw0tmp(:, isgn, iglo) = bw0tmp(:, isgn, iglo) - bz0tmp(:, isgn, iglo) &
               * dtmp(:, it, ik, is)
       end do
    end do
    !$OMP END PARALLEL DO

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Get v1w1

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, ie, is, isgn) &
    !$OMP SHARED(g_lo, gtmp, vns, vpa, aj0, bw0tmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          ! v1 = (nus-nud) vpa J0 f0
          gtmp(:, isgn, iglo) = vns(ik, ie, is, 1) * vpa(:, isgn, iglo) * aj0(:, iglo) &
               * real(bw0tmp(:, isgn, iglo))
       end do
    end do
    !$OMP END PARALLEL DO

    call integrate_moment (gtmp, dtmp, all_procs)    ! v1w1

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Redefine w0 = w1 - v1w1 * s1 / (1 + v1s1) (this is w2 from MAB notes)

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, is, isgn) &
    !$OMP SHARED(g_lo, bw0tmp, bs0tmp, dtmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       it = it_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          bw0tmp(:, isgn, iglo) = bw0tmp(:, isgn, iglo) - bs0tmp(:, isgn, iglo) &
               * dtmp(:, it, ik, is)
       end do
    end do
    !$OMP END PARALLEL DO

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Get v2w2

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, ie, il, is, isgn) &
    !$OMP SHARED(g_lo, gtmp, vns, energy, al, aj1, bw0tmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       ie = ie_idx(g_lo,iglo)
       il = il_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn = 1, 2
          ! v2 = (nus-nud) vperp J1 f0
          ! Note : aj1 = J1(alpha) / alpha, where we have
          ! alpha^2 = energy * al * smz^2 * kperp2 / bmag = vperp^2 smz^2 kperp2 / bmag^2
          ! so energy * al * aj1 = (B / [kperp2 * smz^2]) alpha^2 aj1
          ! = (B / [kperp2 * smz^2]) alpha J1 = vperp J1 / (kperp * smz). As w0tmp
          ! appears to have an extra factor of kperp * smz / B this _may_ work out to
          ! (vperp J1 / B) * ... Is there an extra factor of 1 / B here?
          gtmp(:, isgn, iglo) = vns(ik, ie, is, 1) * energy(ie) * al(il) * aj1(:, iglo) &
               * real(bw0tmp(:, isgn, iglo))
       end do
    end do
    !$OMP END PARALLEL DO

    call integrate_moment (gtmp, dtmp, all_procs)   ! v2w2

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Redefine w0 = w2 / (1 + v2w2)

    !$OMP PARALLEL DO DEFAULT(none) &
    !$OMP PRIVATE(iglo, ik, it, is, isgn) &
    !$OMP SHARED(g_lo, bw0tmp, dtmp) &
    !$OMP SCHEDULE(static)
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
       ik = ik_idx(g_lo,iglo)
       it = it_idx(g_lo,iglo)
       is = is_idx(g_lo,iglo)
       do isgn=1,2
          bw0tmp(:, isgn, iglo) = bw0tmp(:, isgn, iglo) / (1.0 + dtmp(:, it, ik, is))
       end do
    end do
    !$OMP END PARALLEL DO

    deallocate (gtmp, duinv, dtmp, vns)

    if(use_le_layout) then
       allocate (z_big(-ntgrid:ntgrid, 2, g_lo%llim_proc:g_lo%ulim_alloc))
       z_big=cmplx(real(bs0tmp),real(bw0tmp))
       deallocate (bs0tmp)
       deallocate (bw0tmp)
       call gather (g2le, z_big, ctmp)
       deallocate(z_big)
       if (.not. allocated(bs0le)) allocate (bs0le(nxi_lim, negrid+1, le_lo%llim_proc:le_lo%ulim_alloc))
       if (.not. allocated(bw0le)) allocate (bw0le(nxi_lim, negrid+1, le_lo%llim_proc:le_lo%ulim_alloc))
       bs0le = real(ctmp)
       bw0le = aimag(ctmp)

       call gather (g2le, bz0tmp, ctmp)
       deallocate (bz0tmp)
       if (.not. allocated(bz0le)) allocate (bz0le(nxi_lim, negrid+1, le_lo%llim_proc:le_lo%ulim_alloc))
       bz0le = real(ctmp)
       deallocate (ctmp)
    else
       !Only need the real components (imaginary part should be zero, just
       !use complex arrays to allow reuse of existing integrate routines etc.)
       if (.not. allocated(bs0)) then
          allocate (bs0(-ntgrid:ntgrid,2,g_lo%llim_proc:g_lo%ulim_alloc))
          bs0=real(bs0tmp)
          deallocate(bs0tmp)
       endif

       if (.not. allocated(bw0)) then
          allocate (bw0(-ntgrid:ntgrid,2,g_lo%llim_proc:g_lo%ulim_alloc))
          bw0=real(bw0tmp)
          deallocate(bw0tmp)
       endif

       if (.not. allocated(bz0)) then
          allocate (bz0(-ntgrid:ntgrid,2,g_lo%llim_proc:g_lo%ulim_alloc))
          bz0=real(bz0tmp)
          deallocate(bz0tmp)
       endif
    end if

  end subroutine init_diffuse_conserve