*! version 4.0 20250729 - DIME Analytics & LSMS Team, The World Bank - dimeanalytics@worldbank.org, lsms@worldbank.org program define repscan { version 14.1 syntax anything, /// [ /// complete /// ] // take do-file as argument args do_file // complete scan of issues or basic scan if !missing("`complete'") { local complete = 1 } else { local complete = 0 } // reading file cap file close _myfile file open _myfile using "`do_file'", read display("Scanning do-file `do_file':") file read _myfile _line // defining locals for line counter and multiline checks local n_line = 1 local setseed = 0 local set_version = 0 // Outputs table display as result " ________________________________________________________________" display as result "| |" display as result "| Issue |" display as result "|________________________________________________________________|" display as result " " // iterating through lines while r(eof) == 0 { // checking if line has "REPSCAN OK" _check_repscan_ok `"`macval(_line)'"' if `r(_repscan_ok)' == 1 { // do nothing } else { // 1 - Critical checks are always performed // checking single-line reproducibility issues _check_merge_mm `"`macval(_line)'"' `n_line' _check_dup_drop_force `"`macval(_line)'"' `n_line' // detection for multi-line issues: setseed if `setseed' == 0 { _check_setseed `"`macval(_line)'"' local setseed = `r(_setseed)' } // checking multiline issue: runiform without setseed if `setseed' == 0 { _check_runiform `"`macval(_line)'"' `n_line' } // 2 - Other checks are only performed in complete mode if `complete' == 1 { // checking single-line reproducibility issues _check_sort `"`macval(_line)'"' `n_line' _check_sortseed `"`macval(_line)'"' `n_line' _check_bysort `"`macval(_line)'"' `n_line' _check_reclink `"`macval(_line)'"' `n_line' // detection for multi-line issues: version if `set_version' == 0 { _check_version `"`macval(_line)'"' local set_version = `r(_set_version)' } // checking multiline issue: setseed without version if `set_version' == 0 { _check_setseed_as_issue `"`macval(_line)'"' `n_line' } } } // increment in line counter and update content local n_line = `n_line' + 1 file read _myfile _line } file close _myfile display as result "__________________________________________________________________" display as result "See repscan's help article for an explanation of each issue." } end /*************************************************************************** **************************************************************************** Auxiliary functions **************************************************************************** ***************************************************************************/ /************************************************************************* check_version: detects version is set Note that it doesn't print a detection message but returns a scalar *************************************************************************/ program define _check_version, rclass { // Take the name of a string local as the argument args mystring // Check if "version XX" is present local regx "^\s*version +\d{1,2}" if ustrregexm(`"`macval(mystring)'"', "`regx'") { return scalar _set_version = 1 } else { return scalar _set_version = 0 } } end /************************************************************************* check_repscan_ok: detects "REPSCAN OK" at the end of a line Note that it doesn't print a detection message but returns a scalar *************************************************************************/ program define _check_repscan_ok, rclass { // Take the name of a string local as the argument args mystring // Check if "REPSCAN OK" is present local regx "REPSCAN +(?:O|o)(?:K|k) *$" if ustrregexm(`"`macval(mystring)'"', "`regx'") { return scalar _repscan_ok = 1 } else { return scalar _repscan_ok = 0 } } end /************************************************************************* check_runiform: detects the use of runiform *************************************************************************/ program define _check_runiform { // Take the name of a string local as the argument args mystring n_line // Check if "runiform" is present local regx "= +runiform\(" if ustrregexm(`"`macval(mystring)'"', "`regx'") { display as result `" Line `n_line': using runiform() without setting a random seed first"' } } end /************************************************************************* check_setseed: detects the use of set seed. Note that it doesn't print a detection message but returns a scalar *************************************************************************/ program define _check_setseed, rclass { // Take the name of a string local as the argument args mystring // Check if "set seed" is present local regx "^\s*set +seed +\d+" if ustrregexm(`"`macval(mystring)'"', "`regx'") { return scalar _setseed = 1 } else { return scalar _setseed = 0 } } end /************************************************************************* check_setseed_as_issue: also detects the use of set seed. But note this functions prints the result as an issue flagged instead of returning a scalar *************************************************************************/ program define _check_setseed_as_issue { // Take the name of a string local as the argument args mystring n_line // Check if "set seed" is present local regx "^\s*set +seed +\d+" if ustrregexm(`"`macval(mystring)'"', "`regx'") { display as result `" Line `n_line': setting a random seed without setting the version first"' } } end /************************************************************************* check_merge_mm: detects the use of a many-to-many merge on a local string *************************************************************************/ program define _check_merge_mm { // Take the name of a string local as the argument args mystring n_line // Check if "merge m:m" is present local regx "^\s*merge +m:m" if ustrregexm(`"`macval(mystring)'"', "`regx'") { display as result `" Line `n_line': Using many-to-many merge"' } } end /************************************************************************* check_dup_drop_force: detects the use of a forced drop of duplicates *************************************************************************/ program define _check_dup_drop_force { // Take the name of a string local as the argument args mystring n_line // Check if the line is a forced drop of duplicates with the syntax: // duplicates drop *, force local regx "^\s*duplicates +drop[^,]*, +force" if ustrregexm(`"`macval(mystring)'"', "`regx'") { display as result `" Line `n_line': forced drop of duplicates"' } } end /************************************************************************* check_sort: detects the use of a sort *************************************************************************/ program define _check_sort { // Take the name of a string local as the argument args mystring n_line // Check if the line is sorting local regx "^\s*sort +" if ustrregexm(`"`macval(mystring)'"', "`regx'") { display as result `" Line `n_line': using sort"' } } end /************************************************************************* check_sortseed: detects the use of a sortseed *************************************************************************/ program define _check_sortseed { // Take the name of a string local as the argument args mystring n_line // Check if the line is a sortseed local regx "^\s*set +sort(seed|rngstate)" if ustrregexm(`"`macval(mystring)'"', "`regx'") { display as result `" Line `n_line': using sortseed"' } } end /************************************************************************* check_bysort: detects the use of a sortseed *************************************************************************/ program define _check_bysort { // Take the name of a string local as the argument args mystring n_line // Check if the line is a bysort local regx "^\s*bys[^:]{2,}:" if ustrregexm(`"`macval(mystring)'"', "`regx'") { display as result `" Line `n_line': using bysort"' } } end /************************************************************************* check_reclink: detects the use of reclink *************************************************************************/ program define _check_reclink { // Take the name of a string local as the argument args mystring n_line // Check if the line uses reclink local regx "^\s*reclink" if ustrregexm(`"`macval(mystring)'"', "`regx'") { display as result `" Line `n_line': using reclink"' } } end