Integrate species on xy subcommunicator - NO GATHER
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
complex, | intent(in), | dimension (-ntgrid:,:,g_lo%llim_proc:) | :: | g | ||
real, | intent(inout), | dimension (:) | :: | weights | ||
complex, | intent(out), | dimension (-ntgrid:,:,:) | :: | total |
subroutine integrate_species_sub (g, weights, total)
use theta_grid, only: ntgrid
use gs2_layouts, only: g_lo, intspec_sub
use gs2_layouts, only: is_idx, ik_idx, it_idx, ie_idx, il_idx
use mp, only: sum_allreduce_sub, sum_allreduce
use kt_grids, only: kwork_filter
use species, only: spec, tracer_species
use array_utils, only: zero_array, copy
implicit none
complex, dimension (-ntgrid:,:,g_lo%llim_proc:), intent (in) :: g
real, dimension (:), intent (in out) :: weights
complex, dimension (-ntgrid:,:,:), intent (out) :: total
complex, dimension(:,:,:),allocatable :: total_small
integer :: is, il, ie, ik, it, iglo
!Allocate array and ensure is zero
if(intspec_sub)then
! total(:,g_lo%it_min:g_lo%it_max,g_lo%ik_min:g_lo%ik_max)=0.
allocate(total_small(-ntgrid:ntgrid,g_lo%it_min:g_lo%it_max,g_lo%ik_min:g_lo%ik_max))
else
!total=0.
allocate(total_small(-ntgrid:ntgrid,g_lo%ntheta0,g_lo%naky))
endif
call zero_array(total_small)
where (spec%type == tracer_species) weights = 0
!Performed integral (weighted sum) over local velocity space and species
if(any(kwork_filter))then
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP PRIVATE(iglo, it, ik, ie, il, is) &
!$OMP SHARED(g_lo, kwork_filter, weights, w, wl, g) &
!$OMP REDUCTION(+ : total_small) &
!$OMP SCHEDULE(static)
do iglo = g_lo%llim_proc, g_lo%ulim_proc
!Convert from iglo to the separate indices
ik = ik_idx(g_lo,iglo)
it = it_idx(g_lo,iglo)
if(kwork_filter(it,ik)) cycle
is = is_idx(g_lo,iglo)
ie = ie_idx(g_lo,iglo)
il = il_idx(g_lo,iglo)
!Sum up weighted g
total_small(:, it, ik) = total_small(:, it, ik) + &
(weights(is)*w(ie,is))*wl(:,il)*(g(:,1,iglo)+g(:,2,iglo))
end do
!$OMP END PARALLEL DO
else
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP PRIVATE(iglo, it, ik, ie, il, is) &
!$OMP SHARED(g_lo, weights, w, wl, g) &
!$OMP REDUCTION(+ : total_small) &
!$OMP SCHEDULE(static)
do iglo = g_lo%llim_proc, g_lo%ulim_proc
!Convert from iglo to the separate indices
is = is_idx(g_lo,iglo)
ik = ik_idx(g_lo,iglo)
it = it_idx(g_lo,iglo)
ie = ie_idx(g_lo,iglo)
il = il_idx(g_lo,iglo)
!Sum up weighted g
total_small(:, it, ik) = total_small(:, it, ik) + &
(weights(is)*w(ie,is))*wl(:,il)*(g(:,1,iglo)+g(:,2,iglo))
end do
!$OMP END PARALLEL DO
endif
!Reduce sum across all procs in sub communicator to make integral over all velocity space and species
if(intspec_sub)then
call sum_allreduce_sub(total_small,g_lo%xyblock_comm)
else
call sum_allreduce(total_small)
endif
!Copy data into output array
!Note: When not using sub-comms this is an added expense which will mean
!this routine is more expensive than original version just using total.
!In practice we should have two integrate_species routines, one for sub-comms
!and one for world-comms.
if(intspec_sub)then
total(:,g_lo%it_min:g_lo%it_max,g_lo%ik_min:g_lo%ik_max)=total_small
else
call copy(total_small, total)
endif
!Deallocate
deallocate(total_small)
end subroutine integrate_species_sub