FIXME : Add documentation
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer, | intent(in) | :: | nmesh |
subroutine nprocs (nmesh)
use nonlinear_terms, only : nonlin
use species, only : nspec
use kt_grids, only: gridopt_switch, gridopt_single, gridopt_range, gridopt_specified, gridopt_box
use kt_grids, only: naky, ntheta0, nx, ny
use le_grids, only: negrid, nlambda
use theta_grid, only: ntgrid
use gs2_layouts, only: layout, init_x_transform_layouts, init_y_transform_layouts, factors
use mp, only: nproc, iproc
implicit none
real :: fac
integer, intent (in) :: nmesh
integer :: nefacs, nlfacs, nkyfacs, nkxfacs, nspfacs, nkxkyfacs
integer, dimension(:,:), allocatable :: facs
integer :: npe, checknpe, i, j
logical :: onlyxoryfac
write (report_unit, fmt="('Layout = ',a5,/)") layout
write (report_unit, fmt="('Recommended #proc up to:',i8)") npmax
if (nonlin) then
write (report_unit, *)
write (report_unit, fmt="('----------------------------------------------------------------------------------------------------------------------------------------------')")
write (report_unit, fmt="('| Number of | Dimension | Percentage data moved | Recommend unbalanced_xxf | xxf unbalanced | Recommend unbalanced_yxf | yxf unbalanced |')")
write (report_unit, fmt="('| processes | split | by MPI from xxf->yxf | T = true F = false | amount (%) | T = true F = false | amount (%) |')")
write (report_unit, fmt="('----------------------------------------------------------------------------------------------------------------------------------------------')")
call init_x_transform_layouts(ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx, nproc, iproc)
call init_y_transform_layouts(ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx, ny, nproc, iproc)
select case (layout)
case ('lexys')
! write (report_unit, fmt="('Recommended numbers of processors, time on T3E')")
allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
call factors (nspec, nspfacs, facs(:,1))
call factors (naky, nkyfacs, facs(:,2))
call factors (ntheta0, nkxfacs, facs(:,3))
call factors (negrid, nefacs, facs(:,4))
call factors (nlambda, nlfacs, facs(:,5))
do i=1,nspfacs
npe = facs(i,1)
if (npe .gt. npmax) exit
call report_idle_procs(npe, 's')
end do
do i=2,nkyfacs
npe = facs(i,2)*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'y')
end do
do i=2,nkxfacs
npe = facs(i,3)*naky*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'x')
end do
do i=2,nefacs
npe = facs(i,4)*ntheta0*naky*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'e')
end do
do i=2,nlfacs
npe = facs(i,5)*negrid*ntheta0*naky*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'l')
end do
deallocate (facs)
case ('lxyes')
! write (report_unit, fmt="('Recommended numbers of processors, time on SP2')")
allocate (facs(max(nspec,negrid,naky,ntheta0,nlambda)/2+1,5))
call factors (nspec, nspfacs, facs(:,1))
call factors (negrid, nefacs, facs(:,2))
call factors (naky, nkyfacs, facs(:,3))
call factors (ntheta0, nkxfacs, facs(:,4))
call factors (nlambda, nlfacs, facs(:,5))
! faclin = 3.5*(real(nmesh))**1.1/1.e7/5.
fac = 3.5*(real(nmesh))**1.1/1.e7
do i=1,nspfacs
npe = facs(i,1)
if (npe .gt. npmax) exit
call report_idle_procs(npe, 's')
end do
do i=2,nefacs
npe = facs(i,2)*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'e')
end do
do i=2,nkyfacs
npe = facs(i,3)*negrid*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'y')
end do
do i=2,nkxfacs
npe = facs(i,4)*naky*negrid*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'x')
end do
do i=2,nlfacs
npe = facs(i,5)*naky*ntheta0*negrid*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'l')
end do
deallocate (facs)
case ('yxels')
allocate (facs(max(naky,ntheta0,nspec,negrid,nlambda,ntheta0*naky)/2+1,6))
call factors (nspec, nspfacs, facs(:,1))
call factors (nlambda, nlfacs, facs(:,2))
call factors (negrid, nefacs, facs(:,3))
call factors (ntheta0, nkxfacs, facs(:,4))
call factors (naky, nkyfacs, facs(:,5))
call factors (naky*ntheta0, nkxkyfacs, facs(:,6))
fac = 3.5*(real(nmesh))**1.1/1.e7
do i=1,nspfacs
npe = facs(i,1)
if (npe .gt. npmax) exit
call report_idle_procs(npe, 's')
end do
do i=2,nlfacs
npe = facs(i,2)*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'l')
end do
do i=2,nefacs
npe = facs(i,3)*nlambda*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'e')
end do
do i=2,nkxkyfacs
npe = facs(i,6)*nlambda*negrid*nspec
if (npe .gt. npmax) exit
! Check whether this process count would have been generated by the
! plain x or y factorisation or if it is new from the combined x*y
! functionality
onlyxoryfac = .false.
do j=2,nkxfacs
checknpe = facs(j,4)*negrid*nlambda*nspec
if (npe .eq. checknpe) then
onlyxoryfac = .true.
exit
end if
end do
do j=2,nkyfacs
checknpe = facs(j,5)*ntheta0*negrid*nlambda*nspec
if (npe .gt. checknpe) then
onlyxoryfac = .true.
exit
end if
end do
call report_idle_procs(npe, 'x*y', onlyxoryfac)
end do
! do i=2,nkxfacs
! npe = facs(i,4)*negrid*nlambda*nspec
! if (npe .gt. npmax) exit
! call report_idle_procs(npe, 'x')
! end do
! do i=2,nkyfacs
! npe = facs(i,5)*ntheta0*negrid*nlambda*nspec
! if (npe .gt. npmax) exit
! call report_idle_procs(npe, 'y')
! end do
deallocate (facs)
case ('yxles')
allocate (facs(max(ntheta0,naky,nspec,negrid,nlambda,ntheta0*naky)/2+1,6))
call factors (nspec, nspfacs, facs(:,1))
call factors (negrid, nefacs, facs(:,2))
call factors (nlambda, nlfacs, facs(:,3))
call factors (ntheta0, nkxfacs, facs(:,4))
call factors (naky, nkyfacs, facs(:,5))
call factors (naky*ntheta0, nkxkyfacs, facs(:,6))
fac = 3.5*(real(nmesh))**1.1/1.e7
do i=1,nspfacs
npe = facs(i,1)
if (npe .gt. npmax) exit
call report_idle_procs(npe, 's')
end do
do i=2,nefacs
npe = facs(i,2)*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'e')
end do
do i=2,nlfacs
npe = facs(i,3)*negrid*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'l')
end do
do i=2,nkxkyfacs
npe = facs(i,6)*nlambda*negrid*nspec
if (npe .gt. npmax) exit
! Check whether this process count would have been generated by the
! plain x or y factorisation or if it is new from the combined x*y
! functionality
onlyxoryfac = .false.
do j=2,nkxfacs
checknpe = facs(j,4)*nlambda*negrid*nspec
if (npe .eq. checknpe) then
onlyxoryfac = .true.
exit
end if
end do
do j=2,nkyfacs
checknpe = facs(j,5)*ntheta0*nlambda*negrid*nspec
if (npe .eq. checknpe) then
onlyxoryfac = .true.
exit
end if
end do
call report_idle_procs(npe, 'x*y', onlyxoryfac)
end do
! do i=2,nkxfacs
! npe = facs(i,4)*nlambda*negrid*nspec
! if (npe .gt. npmax) exit
! call report_idle_procs(npe, 'x')
! end do
! do i=2,nkyfacs
! npe = facs(i,5)*ntheta0*nlambda*negrid*nspec
! if (npe .gt. npmax) exit
! call report_idle_procs(npe, 'y')
! end do
deallocate (facs)
case ('xyles')
!CMR, 11/11/2009: add processor recommendations for xyles layout
! NB added recommendations that also parallelise in y and x,
! which may be unwise!
allocate (facs(max(nspec,negrid,nlambda,naky,ntheta0,ntheta0*naky)/2+1,6))
call factors (nspec, nspfacs, facs(:,1))
call factors (negrid, nefacs, facs(:,2))
call factors (nlambda, nlfacs, facs(:,3))
call factors (naky, nkyfacs, facs(:,4))
call factors (ntheta0, nkxfacs, facs(:,5))
call factors (naky*ntheta0, nkxkyfacs, facs(:,6))
fac = 3.5*(real(nmesh))**1.1/1.e7
do i=1,nspfacs
npe = facs(i,1)
if (npe .gt. npmax) exit
call report_idle_procs(npe, 's')
end do
do i=2,nefacs
npe = facs(i,2)*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'e')
end do
do i=2,nlfacs
npe = facs(i,3)*negrid*nspec
if (npe .gt. npmax) exit
call report_idle_procs(npe, 'l')
end do
do i=2,nkxkyfacs
npe = facs(i,6)*nlambda*negrid*nspec
if (npe .gt. npmax) exit
! Check whether this process count would have been generated by the
! plain x or y factorisation or if it is new from the combined x*y
! functionality
onlyxoryfac = .false.
do j=2,nkyfacs
checknpe = facs(j,4)*nlambda*negrid*nspec
if (npe .eq. checknpe) then
onlyxoryfac = .true.
exit
end if
end do
do j=2,nkxfacs
checknpe = facs(j,5)*naky*nlambda*negrid*nspec
if (npe .eq. checknpe) then
onlyxoryfac = .true.
exit
end if
end do
call report_idle_procs(npe, 'x*y', onlyxoryfac)
end do
! do i=2,nkyfacs
! npe = facs(i,4)*nlambda*negrid*nspec
! if (npe .gt. npmax) exit
! call report_idle_procs(npe, 'y')
! end do
! do i=2,nkxfacs
! npe = facs(i,5)*naky*nlambda*negrid*nspec
! if (npe .gt. npmax) exit
! call report_idle_procs(npe, 'x')
! end do
deallocate (facs)
end select
write (report_unit, fmt="('----------------------------------------------------------------------------------------------------------------------------------------------')")
write (report_unit, *)
write (report_unit, fmt="('(*) denotes process counts that are from factors of the combined kx*ky index rather than the ordered kx or ky indices separately.')")
write (report_unit, fmt="('To use the unbalanced functionality set unbalanced_xxf = .true. or unbalanced_yxf = .true. in the &layouts_knobs namelist in your GS2 ')")
write (report_unit, fmt="('input file. You can also set the max_unbalanced_xxf and max_unbalanced_yxf flags in the same namelist in the input file to specify the')")
write (report_unit, fmt="('maximum amount of computational imbalance allowed. These flags specify the maximum imbalance as 1 with no imbalance as 0, so to allow')")
write (report_unit, fmt="('50% imbalanced on the xxf decomposition using the following flags in the input file: ')")
write (report_unit, fmt="(' unbalanced_xxf = .true. ')")
write (report_unit, fmt="(' max_unbalanced_xxf = 0.5 ')")
write (report_unit, fmt="('And likewise for yxf.')")
else
write (report_unit, *)
write (report_unit, fmt="('Recommended numbers of processors:')")
select case (gridopt_switch)
case (gridopt_single)
select case (layout)
case ('lexys', 'yxles', 'lxyes', 'xyles')
allocate (facs(max(negrid,nspec,nlambda)/2+1,3))
call factors (nspec, nspfacs, facs(:,1))
call factors (negrid, nefacs, facs(:,2))
call factors (nlambda, nlfacs, facs(:,3))
do i=1,nspfacs-1
npe = facs(i,1)
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nefacs-1
npe = facs(i,2)*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nlfacs
npe = facs(i,3)*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
deallocate (facs)
case ('yxels')
allocate (facs(max(negrid,nspec,nlambda)/2+1,3))
call factors (nspec, nspfacs, facs(:,1))
call factors (nlambda, nlfacs, facs(:,2))
call factors (negrid, nefacs, facs(:,3))
do i=1,nspfacs-1
npe = facs(i,1)
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nlfacs-1
npe = facs(i,2)*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nefacs
npe = facs(i,3)*nlambda*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
deallocate (facs)
end select
case (gridopt_range,gridopt_specified,gridopt_box)
select case (layout)
case ('lexys')
allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
call factors (nspec, nspfacs, facs(:,1))
call factors (naky, nkyfacs, facs(:,2))
call factors (ntheta0, nkxfacs, facs(:,3))
call factors (negrid, nefacs, facs(:,4))
call factors (nlambda, nlfacs, facs(:,5))
do i=1,nspfacs-1
npe = facs(i,1)
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nkyfacs-1
npe = facs(i,2)*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nkxfacs-1
npe = facs(i,3)*naky*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nefacs-1
npe = facs(i,4)*ntheta0*naky*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nlfacs
npe = facs(i,5)*negrid*ntheta0*naky*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
deallocate (facs)
case ('lxyes')
write (report_unit, *)
allocate (facs(max(nspec,negrid,naky,ntheta0)/2+1,4))
call factors (nspec, nspfacs, facs(:,1))
call factors (negrid, nefacs, facs(:,2))
call factors (naky, nkyfacs, facs(:,3))
call factors (ntheta0, nkxfacs, facs(:,4))
! fac = 3.5*(real(nmesh))**1.1/1.e7/5. ! okay for large runs, not small
do i=1,nspfacs-1
npe = facs(i,1)
! if (nmesh/npe > ncut) write &
! & (report_unit, fmt="(' npe = ',i4,' time = ',f8.2,' seconds/time step')") &
! & npe, fac/npe**0.95
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nefacs-1
npe = facs(i,2)*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nkyfacs-1
npe = facs(i,3)*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nkxfacs
npe = facs(i,4)*naky*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
deallocate (facs)
case ('yxles')
allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
call factors (nspec, nspfacs, facs(:,1))
call factors (negrid, nefacs, facs(:,2))
call factors (nlambda, nlfacs, facs(:,3))
call factors (ntheta0, nkxfacs, facs(:,4))
call factors (naky, nkyfacs, facs(:,5))
do i=1,nspfacs-1
npe = facs(i,1)
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nefacs-1
npe = facs(i,2)*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nlfacs-1
npe = facs(i,3)*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nkxfacs-1
npe = facs(i,4)*nlambda*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nkyfacs
npe = facs(i,5)*ntheta0*nlambda*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
deallocate (facs)
case ('xyles')
allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
call factors (nspec, nspfacs, facs(:,1))
call factors (negrid, nefacs, facs(:,2))
call factors (nlambda, nlfacs, facs(:,3))
call factors (naky, nkyfacs, facs(:,4))
call factors (ntheta0, nkxfacs, facs(:,5))
do i=1,nspfacs-1
npe = facs(i,1)
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i8)") npe
end do
do i=1,nefacs-1
npe = facs(i,2)*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i8)") npe
end do
do i=1,nlfacs-1
npe = facs(i,3)*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i8)") npe
end do
do i=1,nkyfacs-1
npe = facs(i,4)*nlambda*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i8)") npe
end do
do i=1,nkxfacs
npe = facs(i,5)*ntheta0*nlambda*negrid*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i8)") npe
end do
deallocate (facs)
case ('yxels')
allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
call factors (nspec, nspfacs, facs(:,1))
call factors (nlambda, nlfacs, facs(:,2))
call factors (negrid, nefacs, facs(:,3))
call factors (ntheta0, nkxfacs, facs(:,4))
call factors (naky, nkyfacs, facs(:,5))
do i=1,nspfacs-1
npe = facs(i,1)
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nlfacs-1
npe = facs(i,2)*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nefacs-1
npe = facs(i,3)*nlambda*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nkxfacs-1
npe = facs(i,4)*negrid*nlambda*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
do i=1,nkyfacs
npe = facs(i,5)*ntheta0*negrid*nlambda*nspec
if (nmesh/npe > ncut) write (report_unit, fmt="(' npe = ',i4)") npe
end do
deallocate (facs)
end select
end select
end if
end subroutine nprocs