*! v1 by Ryan Knight 18feb2011 prog def mergeall version 10.1 syntax namelist using/ /// [, strings(namelist) force do(string) /// double txt csv dta tab comma /// showsource format(string) pattern(string)] quietly { cap assert _N == 0 if _rc { noisily di as err "you must start with an empty dataset" exit 18 } if "`strings'" !="" & "`force'"=="" { noisily di _newline as err "force option must be specified with strings option" exit 198 } local id `namelist' if "`id'" == "" { noisily di as err "unique identifier must be specified" exit 198 } if "`format'" != "" { local format format( `format' ) } else { local format format( %37.0g ) } * Get lists of files to compare local ext `txt'`csv'`dta' if "`ext'" == "" & "`pattern'" == "" { local pattern *.csv } else if "`pattern'" == "" { local pattern *.`txt'`csv'`dta' } else { local pattern `pattern' } local files: dir `"`using'"' files `"`pattern'"', respectcase * " Generate an empty dataset to merge into clear gen `id'=. gen _disagreement = . if "`showsource'" != "" { gen _source = "" gen _dissource = "" } tempfile all thisfile save `all', replace * Loop through files in each entry, merging into a single master file for each entry noisily di as txt "Merging files:" local i = 0 foreach ifile in `files' { * Save file name in a global so it can be accessed by the cleaning .do file if necessary global filename `ifile' noisily di as res "$filename" if "`ext'" == "dta" | "`ext'" == ".dta" { use `"`using'/`ifile'"' , clear // " } else { insheet using `"`using'/`ifile'"', clear `comma' `tab' `double' // " } if "`do'" != "" { do `"`do'"' // " } * List duplicates drop if `id' ==. cap isid `id' if _rc { duplicates tag `id' , gen(_iddup) di as err "`id' does not uniquely identify the following observations in $filename" list `id' if _iddup exit 459 } if "`strings'" != "" { * Set strings/numeric ds , has(type string) /* Get a list of all the string vars in the dataset */ local isstring `r(varlist)' local destringers: list isstring - strings /* Finds vars that are string but shouldn't be */ if "`destringers'" != "" { destring `destringers', replace force } tostring `strings', replace force `format' cap confirm string variable `notstring' if _rc { exiterr } merge `id' using `all', sort update } else { * Set every variable that has a string in any file to string ds , has(type numeric) local numhere `r(varlist)' ds , has(type string) local strhere `r(varlist)' local stranywhere: list stranywhere | strhere local notstring: list stranywhere & numhere if "`notstring'" != "" { tostring `notstring', replace `format' cap confirm string variable `notstring' if _rc { exiterr } } * Merge datasets save `thisfile', replace use `all', clear local i = `i'+1 if `i' > 1 { ds , has(type numeric) local numhere `r(varlist)' local notstring: list stranywhere & numhere if "`notstring'" != "" { tostring `notstring', replace `format' cap confirm string variable `notstring' if _rc { exiterr } } } merge `id' using `thisfile', sort update if "`showsource'" != "" { replace _source = "$filename" if _merge == 2 replace _dissource = "$filename" if _merge == 5 } } replace _disagreement = 1 if _merge == 5 cap drop _merge save `all', replace } count if _disagreement ==1 if `r(N)' == 0 { drop _disagreement cap drop _dissource } else { replace _disagreement=0 if _disagreement==. noisily di as err "Note: Information may have been lost due to disagreement between datasets" } } end program def exiterr di as err "Data cannot be converted to string without loss of information." /// _newline "You need to specify an appropraite format using the format() option" exit 198 end