!> FIXME : Add documentation module job_manage implicit none private public :: timer_local, time_message, init_checktime, checktime, checkstop, njobs, job_fork integer :: njobs logical, parameter :: debug = .false. real :: wall_clock_initial_time = 0. logical :: checktime_initialized = .false. contains !> Returns CPU time in seconds. Currently just a thin wrapper to !> method of the same name in mp. In the future we probably want to !> remove the method here and change the relevant use statements. real function timer_local() use mp, only: timer_local_mp => timer_local implicit none timer_local = timer_local_mp() end function timer_local !> This routine counts elapsed time between two calls. Currently !> just a thin wrapper to method of the same name in mp. In the !> future we probably want to remove the method here and change the !> relevant use statements. subroutine time_message(lprint,targ,chmessage) use mp, only: time_message_mp => time_message implicit none character (len=*), intent(in) :: chmessage logical, intent(in) :: lprint real, intent(in out) :: targ(2) ! tsum and told call time_message_mp(lprint, targ, chmessage) end subroutine time_message !> FIXME : Add documentation subroutine job_fork (n_ensembles) use file_utils, only: list_name, run_name, init_job_name use file_utils, only: init_error_unit, init_input_unit, list_name use file_utils, only: futils_initialized => initialized use constants, only: run_name_size use mp, only: job, proc0, nproc, init_jobs, broadcast, finish_mp implicit none integer, intent (in), optional :: n_ensembles integer, dimension(:), allocatable :: group0 integer :: i, l, list_unit, ierr character(run_name_size), dimension(:), allocatable :: job_list logical :: list_mode list_mode = .not. present(n_ensembles) ! open file containing list of input files to run and read total ! number of input files from first line if (list_mode) then if (proc0) then open (newunit = list_unit, file = trim(list_name)) read (list_unit, *) njobs end if else njobs = n_ensembles end if call broadcast (njobs) if (nproc < njobs) then if (proc0) then write (*,*) write (*,*) 'Number of jobs = ',njobs,' and number of processors = ',nproc write (*,*) 'Number of processors must not be less than the number of jobs' write (*,*) 'Stopping' write (*,*) end if call finish_mp !Ok as all procs call this routine stop end if if (mod(nproc, njobs) /= 0) then if (proc0) then write (*,*) write (*,*) 'Number of jobs = ',njobs,' and number of processors = ',nproc write (*,*) 'Number of jobs must evenly divide the number of processors.' write (*,*) 'Stopping' write (*,*) end if call finish_mp !Ok as all procs call this routine stop end if allocate (job_list(0:njobs-1)) if (proc0) then if (list_mode) then do i = 0, njobs - 1 read (list_unit, fmt="(a)") job_list(i) end do close (list_unit) else l = len_trim(list_name) do i = 0, njobs - 1 write(job_list(i),'(A,"_",I0)') list_name(1:l-3), i + 1 end do end if end if call broadcast(job_list) allocate (group0(0:njobs-1)) call init_jobs (njobs, group0, ierr) call init_job_name (job_list(job)) if (proc0) then call init_error_unit(.true.) call init_input_unit(.true.) end if if (nproc > 1 .and. proc0) & & write(*,*) 'Job ',job,' is called ',trim(run_name),& & ' and is running on ',nproc,' processors' if (nproc == 1) write(*,*) 'Job ',job,' is called ',trim(run_name),& & ' and is running on ',nproc,' processor' deallocate (group0, job_list) !> We need to set this for the group proc0, as it is previously !> only set for the global proc0 if (proc0) futils_initialized = .true. end subroutine job_fork !> Check whether the stop file .stop is present, and if so trigger exit. subroutine checkstop(exit, exit_reason, list) use mp, only: proc0, broadcast use file_utils, only: run_name, list_name use constants, only: run_name_size use exit_codes, only: EXIT_STOP_FILE, exit_code use optionals, only: get_option_with_default implicit none logical, intent (in), optional :: list logical, intent (in out) :: exit type(exit_code), intent (in out) :: exit_reason character(run_name_size) :: filename logical :: exit_local, is_list ! If .stop file has appeared, set exit flag if (proc0) then is_list = get_option_with_default(list, .false.) if (is_list) then filename = list_name(:len_trim(list_name)-5)//".stop" else filename = trim(run_name)//".stop" end if inquire(file = filename, exist = exit_local) if (exit_local) then exit_reason = EXIT_STOP_FILE call exit_reason%write_exit_file() end if exit = exit .or. exit_local end if call broadcast (exit) end subroutine checkstop !> FIXME : Add documentation subroutine init_checktime if (checktime_initialized) return wall_clock_initial_time = timer_local() checktime_initialized = .true. end subroutine init_checktime !> Check whether elapsed run time is within a 5 minutes of exceeding the available CPU !> time, and if so trigger the code exit. subroutine checktime(avail_time, exit, margin_in) use mp, only: proc0, broadcast use file_utils, only: error_unit use exit_codes, only: EXIT_OUT_OF_TIME use optionals, only: get_option_with_default implicit none ! available time in second real, intent(in) :: avail_time ! margin real, intent(in), optional :: margin_in ! true if elapse time exceed available time logical, intent(in out) :: exit real :: elapse_time real, save :: margin = 300. ! 5 minutes margin = get_option_with_default(margin_in, margin) if (.not. checktime_initialized) then call init_checktime else elapse_time = timer_local() - wall_clock_initial_time if (proc0) then if (elapse_time >= avail_time - margin) then write(error_unit(),'(a,f12.4,a,f12.4)') & & 'Elapse time ',elapse_time, & & ' exceeds available time',avail_time-margin write(error_unit(),'(a,f12.4,a,f12.4,a)') & & ' (Given CPU time: ',avail_time, & & ' Margin: ',margin,')' exit = .true. call EXIT_OUT_OF_TIME%write_exit_file() end if end if end if call broadcast(exit) end subroutine checktime end module job_manage