nprocs Subroutine

public subroutine nprocs(nmesh)

FIXME : Add documentation

Arguments

Type IntentOptional Attributes Name
integer, intent(in) :: nmesh

Contents

Source Code


Source Code

  subroutine nprocs (nmesh)
    use nonlinear_terms, only : nonlin
    use species, only : nspec
    use kt_grids, only: gridopt_switch, gridopt_single, gridopt_range, gridopt_specified, gridopt_box
    use kt_grids, only: naky, ntheta0, nx, ny
    use le_grids, only: negrid, nlambda
    use theta_grid, only: ntgrid
    use gs2_layouts, only: layout, init_x_transform_layouts, init_y_transform_layouts, factors
    use mp, only: nproc, iproc
    implicit none
    real :: fac
    integer, intent (in) :: nmesh
    integer :: nefacs, nlfacs, nkyfacs, nkxfacs, nspfacs, nkxkyfacs
    integer, dimension(:,:), allocatable :: facs
    integer :: npe, checknpe, i, j
    logical :: onlyxoryfac


    write (report_unit, fmt="('Layout = ',a5,/)") layout 
    write (report_unit, fmt="('Recommended #proc up to:',i8)") npmax 
    if (nonlin) then

       write (report_unit, *) 
       write (report_unit, fmt="('----------------------------------------------------------------------------------------------------------------------------------------------')")
       write (report_unit, fmt="('|   Number of    | Dimension | Percentage data moved | Recommend unbalanced_xxf | xxf unbalanced | Recommend unbalanced_yxf | yxf unbalanced |')")
       write (report_unit, fmt="('|   processes    |   split   | by MPI from xxf->yxf  |    T = true F = false    |   amount (%)   |    T = true F = false    |   amount (%)   |')")
       write (report_unit, fmt="('----------------------------------------------------------------------------------------------------------------------------------------------')")


       call init_x_transform_layouts(ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx, nproc, iproc)
       call init_y_transform_layouts(ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx, ny, nproc, iproc)

       select case (layout)
       case ('lexys')

          !          write (report_unit, fmt="('Recommended numbers of processors, time on T3E')") 
          allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
          call factors (nspec, nspfacs, facs(:,1))
          call factors (naky, nkyfacs, facs(:,2))
          call factors (ntheta0, nkxfacs, facs(:,3))
          call factors (negrid, nefacs, facs(:,4))
          call factors (nlambda, nlfacs, facs(:,5))
          do i=1,nspfacs
             npe = facs(i,1)
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 's')
          end do
          do i=2,nkyfacs
             npe = facs(i,2)*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'y')
          end do
          do i=2,nkxfacs
             npe = facs(i,3)*naky*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'x')
          end do
          do i=2,nefacs
             npe = facs(i,4)*ntheta0*naky*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'e')
          end do
          do i=2,nlfacs
             npe = facs(i,5)*negrid*ntheta0*naky*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'l')
          end do
          deallocate (facs)

       case ('lxyes')

          !          write (report_unit, fmt="('Recommended numbers of processors, time on SP2')") 
          allocate (facs(max(nspec,negrid,naky,ntheta0,nlambda)/2+1,5))
          call factors (nspec, nspfacs, facs(:,1))
          call factors (negrid, nefacs, facs(:,2))
          call factors (naky, nkyfacs, facs(:,3))
          call factors (ntheta0, nkxfacs, facs(:,4))
          call factors (nlambda, nlfacs, facs(:,5))
          !          faclin = 3.5*(real(nmesh))**1.1/1.e7/5.
          fac = 3.5*(real(nmesh))**1.1/1.e7
          do i=1,nspfacs
             npe = facs(i,1)
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 's')
          end do
          do i=2,nefacs
             npe = facs(i,2)*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'e')
          end do
          do i=2,nkyfacs
             npe = facs(i,3)*negrid*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'y')
          end do
          do i=2,nkxfacs
             npe = facs(i,4)*naky*negrid*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'x')
          end do
          do i=2,nlfacs
             npe = facs(i,5)*naky*ntheta0*negrid*nspec 
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'l')
          end do
          deallocate (facs)

       case ('yxels')

          allocate (facs(max(naky,ntheta0,nspec,negrid,nlambda,ntheta0*naky)/2+1,6))
          call factors (nspec, nspfacs, facs(:,1))
          call factors (nlambda, nlfacs, facs(:,2))
          call factors (negrid, nefacs, facs(:,3))
          call factors (ntheta0, nkxfacs, facs(:,4))
          call factors (naky, nkyfacs, facs(:,5))
          call factors (naky*ntheta0, nkxkyfacs, facs(:,6))
          fac = 3.5*(real(nmesh))**1.1/1.e7
          do i=1,nspfacs
             npe = facs(i,1)
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 's')
          end do
          do i=2,nlfacs
             npe = facs(i,2)*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'l')
          end do
          do i=2,nefacs
             npe = facs(i,3)*nlambda*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'e')
          end do
          do i=2,nkxkyfacs
             npe = facs(i,6)*nlambda*negrid*nspec
             if (npe .gt. npmax) exit
             ! Check whether this process count would have been generated by the 
             ! plain x or y factorisation or if it is new from the combined x*y
             ! functionality             
             onlyxoryfac = .false.
             do j=2,nkxfacs
                checknpe = facs(j,4)*negrid*nlambda*nspec
                if (npe .eq. checknpe) then
                   onlyxoryfac = .true.
                   exit
                end if
             end do
             do j=2,nkyfacs
                checknpe = facs(j,5)*ntheta0*negrid*nlambda*nspec
                if (npe .gt. checknpe) then
                   onlyxoryfac = .true.
                   exit
                end if
             end do
             call report_idle_procs(npe, 'x*y', onlyxoryfac)
          end do
          !          do i=2,nkxfacs
          !             npe = facs(i,4)*negrid*nlambda*nspec
          !             if (npe .gt. npmax) exit
          !             call report_idle_procs(npe, 'x')
          !          end do
          !          do i=2,nkyfacs
          !             npe = facs(i,5)*ntheta0*negrid*nlambda*nspec
          !             if (npe .gt. npmax) exit
          !             call report_idle_procs(npe, 'y')
          !          end do
          deallocate (facs)

       case ('yxles')

          allocate (facs(max(ntheta0,naky,nspec,negrid,nlambda,ntheta0*naky)/2+1,6))
          call factors (nspec, nspfacs, facs(:,1))
          call factors (negrid, nefacs, facs(:,2))
          call factors (nlambda, nlfacs, facs(:,3))
          call factors (ntheta0, nkxfacs, facs(:,4))
          call factors (naky, nkyfacs, facs(:,5))
          call factors (naky*ntheta0, nkxkyfacs, facs(:,6))
          fac = 3.5*(real(nmesh))**1.1/1.e7
          do i=1,nspfacs
             npe = facs(i,1)
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 's')
          end do
          do i=2,nefacs
             npe = facs(i,2)*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'e')
          end do
          do i=2,nlfacs
             npe = facs(i,3)*negrid*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'l')
          end do
          do i=2,nkxkyfacs
             npe = facs(i,6)*nlambda*negrid*nspec
             if (npe .gt. npmax) exit
             ! Check whether this process count would have been generated by the 
             ! plain x or y factorisation or if it is new from the combined x*y
             ! functionality  
             onlyxoryfac = .false.
             do j=2,nkxfacs
                checknpe = facs(j,4)*nlambda*negrid*nspec
                if (npe .eq. checknpe) then
                   onlyxoryfac = .true.
                   exit
                end if
             end do
             do j=2,nkyfacs
                checknpe = facs(j,5)*ntheta0*nlambda*negrid*nspec
                if (npe .eq. checknpe) then
                   onlyxoryfac = .true.
                   exit
                end if
             end do
             call report_idle_procs(npe, 'x*y', onlyxoryfac)
          end do
          !          do i=2,nkxfacs
          !             npe = facs(i,4)*nlambda*negrid*nspec
          !             if (npe .gt. npmax) exit
          !             call report_idle_procs(npe, 'x')
          !          end do
          !          do i=2,nkyfacs
          !             npe = facs(i,5)*ntheta0*nlambda*negrid*nspec
          !             if (npe .gt. npmax) exit
          !             call report_idle_procs(npe, 'y')
          !          end do
          deallocate (facs)

       case ('xyles')
          !CMR, 11/11/2009: add processor recommendations for xyles layout
          !            NB added recommendations that also parallelise in y and x, 
          !                          which may be unwise!

          allocate (facs(max(nspec,negrid,nlambda,naky,ntheta0,ntheta0*naky)/2+1,6))
          call factors (nspec, nspfacs, facs(:,1))
          call factors (negrid, nefacs, facs(:,2))
          call factors (nlambda, nlfacs, facs(:,3))
          call factors (naky, nkyfacs, facs(:,4))
          call factors (ntheta0, nkxfacs, facs(:,5))
          call factors (naky*ntheta0, nkxkyfacs, facs(:,6))
          fac = 3.5*(real(nmesh))**1.1/1.e7
          do i=1,nspfacs
             npe = facs(i,1)
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 's')
          end do
          do i=2,nefacs
             npe = facs(i,2)*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'e')
          end do
          do i=2,nlfacs
             npe = facs(i,3)*negrid*nspec
             if (npe .gt. npmax) exit
             call report_idle_procs(npe, 'l')
          end do
          do i=2,nkxkyfacs
             npe = facs(i,6)*nlambda*negrid*nspec
             if (npe .gt. npmax) exit
             ! Check whether this process count would have been generated by the 
             ! plain x or y factorisation or if it is new from the combined x*y
             ! functionality  
             onlyxoryfac = .false.
             do j=2,nkyfacs
                checknpe = facs(j,4)*nlambda*negrid*nspec
                if (npe .eq. checknpe) then
                   onlyxoryfac = .true.
                   exit
                end if
             end do
             do j=2,nkxfacs
                checknpe = facs(j,5)*naky*nlambda*negrid*nspec
                if (npe .eq. checknpe) then
                   onlyxoryfac = .true.
                   exit
                end if
             end do
             call report_idle_procs(npe, 'x*y', onlyxoryfac)
          end do
          !          do i=2,nkyfacs
          !             npe = facs(i,4)*nlambda*negrid*nspec
          !             if (npe .gt. npmax) exit
          !             call report_idle_procs(npe, 'y')
          !          end do
          !          do i=2,nkxfacs
          !             npe = facs(i,5)*naky*nlambda*negrid*nspec
          !             if (npe .gt. npmax) exit
          !             call report_idle_procs(npe, 'x')
          !          end do
          deallocate (facs)

       end select

       write (report_unit, fmt="('----------------------------------------------------------------------------------------------------------------------------------------------')")
       write (report_unit, *)
       write (report_unit, fmt="('(*) denotes process counts that are from factors of the combined kx*ky index rather than the ordered kx or ky indices separately.')")
       write (report_unit, fmt="('To use the unbalanced functionality set unbalanced_xxf = .true. or unbalanced_yxf = .true. in the &layouts_knobs namelist in your GS2 ')")
       write (report_unit, fmt="('input file. You can also set the max_unbalanced_xxf and max_unbalanced_yxf flags in the same namelist in the input file to specify the')")
       write (report_unit, fmt="('maximum amount of computational imbalance allowed. These flags specify the maximum imbalance as 1 with no imbalance as 0, so to allow')")
       write (report_unit, fmt="('50% imbalanced on the xxf decomposition using the following flags in the input file: ')")
       write (report_unit, fmt="('                                                                                     unbalanced_xxf = .true. ')")
       write (report_unit, fmt="('                                                                                     max_unbalanced_xxf = 0.5 ')")
       write (report_unit, fmt="('And likewise for yxf.')")

    else
       write (report_unit, *) 
       write (report_unit, fmt="('Recommended numbers of processors:')")
       select case (gridopt_switch)

       case (gridopt_single)

          select case (layout)
          case ('lexys', 'yxles', 'lxyes', 'xyles')
             allocate (facs(max(negrid,nspec,nlambda)/2+1,3))
             call factors (nspec, nspfacs, facs(:,1))
             call factors (negrid, nefacs, facs(:,2))
             call factors (nlambda, nlfacs, facs(:,3))
             do i=1,nspfacs-1
                npe = facs(i,1)
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nefacs-1
                npe = facs(i,2)*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nlfacs
                npe = facs(i,3)*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             deallocate (facs)

          case ('yxels')
             allocate (facs(max(negrid,nspec,nlambda)/2+1,3))
             call factors (nspec, nspfacs, facs(:,1))
             call factors (nlambda, nlfacs, facs(:,2))
             call factors (negrid, nefacs, facs(:,3))
             do i=1,nspfacs-1
                npe = facs(i,1)
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nlfacs-1
                npe = facs(i,2)*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nefacs
                npe = facs(i,3)*nlambda*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             deallocate (facs)
          end select

       case (gridopt_range,gridopt_specified,gridopt_box)

          select case (layout)
          case ('lexys')

             allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
             call factors (nspec, nspfacs, facs(:,1))
             call factors (naky, nkyfacs, facs(:,2))
             call factors (ntheta0, nkxfacs, facs(:,3))
             call factors (negrid, nefacs, facs(:,4))
             call factors (nlambda, nlfacs, facs(:,5))
             do i=1,nspfacs-1
                npe = facs(i,1)
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nkyfacs-1
                npe = facs(i,2)*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nkxfacs-1
                npe = facs(i,3)*naky*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nefacs-1
                npe = facs(i,4)*ntheta0*naky*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nlfacs
                npe = facs(i,5)*negrid*ntheta0*naky*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             deallocate (facs)

          case ('lxyes')

             write (report_unit, *) 
             allocate (facs(max(nspec,negrid,naky,ntheta0)/2+1,4))
             call factors (nspec, nspfacs, facs(:,1))
             call factors (negrid, nefacs, facs(:,2))
             call factors (naky, nkyfacs, facs(:,3))
             call factors (ntheta0, nkxfacs, facs(:,4))
             !             fac = 3.5*(real(nmesh))**1.1/1.e7/5.  ! okay for large runs, not small
             do i=1,nspfacs-1
                npe = facs(i,1)
                !                if (nmesh/npe > ncut) write &
                !                     & (report_unit, fmt="('  npe = ',i4,'    time = ',f8.2,'  seconds/time step')") &
                !                     & npe, fac/npe**0.95
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nefacs-1
                npe = facs(i,2)*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nkyfacs-1
                npe = facs(i,3)*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nkxfacs
                npe = facs(i,4)*naky*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             deallocate (facs)

          case ('yxles')

             allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
             call factors (nspec, nspfacs, facs(:,1))
             call factors (negrid, nefacs, facs(:,2))
             call factors (nlambda, nlfacs, facs(:,3))
             call factors (ntheta0, nkxfacs, facs(:,4))
             call factors (naky, nkyfacs, facs(:,5))
             do i=1,nspfacs-1
                npe = facs(i,1)
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nefacs-1
                npe = facs(i,2)*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nlfacs-1
                npe = facs(i,3)*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nkxfacs-1
                npe = facs(i,4)*nlambda*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nkyfacs
                npe = facs(i,5)*ntheta0*nlambda*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             deallocate (facs)

          case ('xyles')

             allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
             call factors (nspec, nspfacs, facs(:,1))
             call factors (negrid, nefacs, facs(:,2))
             call factors (nlambda, nlfacs, facs(:,3))
             call factors (naky, nkyfacs, facs(:,4))
             call factors (ntheta0, nkxfacs, facs(:,5))
             do i=1,nspfacs-1
                npe = facs(i,1)
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i8)") npe
             end do
             do i=1,nefacs-1
                npe = facs(i,2)*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i8)") npe
             end do
             do i=1,nlfacs-1
                npe = facs(i,3)*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i8)") npe
             end do
             do i=1,nkyfacs-1
                npe = facs(i,4)*nlambda*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i8)") npe
             end do
             do i=1,nkxfacs
                npe = facs(i,5)*ntheta0*nlambda*negrid*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i8)") npe
             end do
             deallocate (facs)

          case ('yxels')

             allocate (facs(max(nspec,naky,ntheta0,negrid,nlambda)/2+1,5))
             call factors (nspec, nspfacs, facs(:,1))
             call factors (nlambda, nlfacs, facs(:,2))
             call factors (negrid, nefacs, facs(:,3))
             call factors (ntheta0, nkxfacs, facs(:,4))
             call factors (naky, nkyfacs, facs(:,5))
             do i=1,nspfacs-1
                npe = facs(i,1)
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nlfacs-1
                npe = facs(i,2)*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nefacs-1
                npe = facs(i,3)*nlambda*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nkxfacs-1
                npe = facs(i,4)*negrid*nlambda*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             do i=1,nkyfacs
                npe = facs(i,5)*ntheta0*negrid*nlambda*nspec
                if (nmesh/npe > ncut) write (report_unit, fmt="('  npe = ',i4)") npe
             end do
             deallocate (facs)

          end select
       end select
    end if

  end subroutine nprocs