*! dobatch 1.0 4mar2025 by Julian Reif program define dobatch, rclass * Optional params for dobatch are specified in the following globals: * DOBATCH_DISABLE * DOBATCH_MIN_CPUS_AVAILABLE * DOBATCH_MAX_STATA_JOBS * DOBATCH_WAIT_TIME_MINS version 13.0 * If dobatch is disabled, just run the dofile as normal if `"$DOBATCH_DISABLE"'=="1" { do `0' exit } * dobatch requires unix-based systems and Stata MP cap assert c(os)=="Unix" if _rc { noi di as error "dobatch requires Linux or MacOSX terminal OS" exit 198 } cap assert c(MP)==1 if _rc { noi di as error "dobatch requires Stata MP" exit 198 } * First argument must be dofilename, followed by optional arguments to the dofile syntax anything [, nostop] gettoken dofile args : anything cap confirm file "`dofile'" if _rc cap confirm file "`dofile'.do" if _rc confirm file "`dofile'" * Set default values for how many CPUs need to be available and for max number of active Stata jobs * (1) MIN_CPUS_AVAILABLE = (# Stata-MP license cores) - 1 * (2) MAX_STATA_JOBS = (# cores) / (# Stata-MP license cores). If <2, set to 2. * Note: c(processors_mach) evaluates to missing when running non-MP Stata. local num_cpus_machine = c(processors_mach) local num_cpus_statamp = c(processors_lic) local default_min_cpus_available = max(`num_cpus_statamp' - 1,1) local default_max_stata_jobs = max(floor(`num_cpus_machine' / `num_cpus_statamp'), 2) local MIN_CPUS_AVAILABLE = `default_min_cpus_available' local MAX_STATA_JOBS = `default_max_stata_jobs' * Default wait time is 5 minutes local WAIT_TIME_MINS = 5 * The default values above can be overriden by user-defined global macros foreach param in MIN_CPUS_AVAILABLE MAX_STATA_JOBS WAIT_TIME_MINS { if !mi(`"${DOBATCH_`param'}"') { cap confirm number ${DOBATCH_`param'} if _rc { noi di as error _n "Error parsing the global variable DOBATCH_`param'" confirm number ${DOBATCH_`param'} } local `param' = ${DOBATCH_`param'} if "`param'"=="WAIT_TIME_MINS" noi di as text "Wait time set to " as result "`WAIT_TIME_MINS'" as text " minutes" } } if `MAX_STATA_JOBS' < 1 { noi di as error "DOBATCH_MAX_STATA_JOBS must be at least 1" exit 198 } noi di as text _n "Minimum required available CPUs: " as result `MIN_CPUS_AVAILABLE' noi di as text "Maximum number of background Stata jobs allowed: " as result `MAX_STATA_JOBS' tempname fh tempfile tmp ************************************************ * Confirm that stata-mp is an installed application ************************************************ cap rm `tmp' qui shell sh -c 'command -v stata-mp >/dev/null && touch `tmp'' cap confirm file `tmp' if _rc { di as error "stata-mp not found. Ensure Stata is installed and accessible from your system's PATH." di as error "Try running 'which stata-mp' or 'echo \$PATH' in the terminal to debug." exit 601 } ************************************************ * Check server usage ************************************************ * If check_cpus=1, wait until there are (1) at least MIN_CPUS_AVAILABLE CPUs available; and (2) less than MAX_STATA_JOBS active Stata processes * - If wait time is non-positive, skip this code (ie, set check_cpus = 0) local check_cpus 1 if `WAIT_TIME_MINS'<=0 local check_cpus = 0 while (`check_cpus'==1) { cap rm `tmp' qui shell sh -c 'LANG=C uptime | sed -E "s/.*load average[s]?: //" | tr -s " ," "," | cut -d"," -f1' > `tmp' file open `fh' using `tmp', read file read `fh' line file close `fh' local one_min_load_avg = trim("`line'") cap confirm number `one_min_load_avg' if _rc { di as error "Error parsing the load average:" di as error `"shell sh -c 'LANG=C uptime | sed -E "s/.*load average[s]?: //" | tr -s " ," "," | cut -d"," -f1'"' confirm number `one_min_load_avg' } local free_cpus = `num_cpus_machine' - `one_min_load_avg' noi di _n "Available CPUs at $S_TIME: `free_cpus'" * Count number of background stata-mp processes. Subtract one to exclude the parent process (this script). cap rm `tmp' qui shell ps aux | grep '[s]tata-mp' | wc -l > `tmp' file open `fh' using `tmp', read file read `fh' line file close `fh' local num_stata_jobs = trim("`line'") cap confirm integer number `num_stata_jobs' if _rc { di as error "Error counting the number of background Stata processes:" di as error `"shell ps aux | grep '[s]tata-mp' | wc -l"' confirm integer number `num_stata_jobs' } local num_stata_jobs = `num_stata_jobs'-1 noi di "Background Stata MP jobs at $S_TIME: `num_stata_jobs'" * If server is busy, wait a few minutes and try again if `free_cpus' < `MIN_CPUS_AVAILABLE' | `num_stata_jobs' >= `MAX_STATA_JOBS' { noi di "Waiting for at least `MIN_CPUS_AVAILABLE' available CPUs and fewer than `MAX_STATA_JOBS' background Stata MP jobs..." sleep `=1000*60*`WAIT_TIME_MINS'' } else local check_cpus = 0 } ************************************************ * Run Stata MP in Unix batch mode ************************************************ tempname stata_pid_fh tempfile stata_pid_file local prefix "nohup stata-mp -b do" local suffix "</dev/null >/dev/null 2>&1 & echo $! > `stata_pid_file'" if !mi("`stop'") local stop ", `stop' " noi di _n `"sh -c '`prefix' \"`dofile'\" `args' `stop'`suffix''"' shell sh -c '`prefix' \"`dofile'\" `args' `stop'`suffix'' * Store the process ID number file open `stata_pid_fh' using `"`stata_pid_file'"', read file read `stata_pid_fh' stata_pid file close `stata_pid_fh' local stata_pid = trim(`"`stata_pid'"') cap confirm number `stata_pid' if _rc local stata_pid = . if !mi(`stata_pid') global DOBATCH_STATA_PID "$DOBATCH_STATA_PID `stata_pid'" global DOBATCH_STATA_PID = trim("$DOBATCH_STATA_PID") * Return parameter values return scalar PID = `stata_pid' return scalar MIN_CPUS_AVAILABLE = `MIN_CPUS_AVAILABLE' return scalar MAX_STATA_JOBS = `MAX_STATA_JOBS' return scalar WAIT_TIME_MINS = `WAIT_TIME_MINS' end ** EOF