// This program renames and labels variables using an external data sets. capture program drop renamefrom program define renamefrom, nclass syntax using/, filetype(string) raw(string) clean(string) /// [delimiters(string)] [sheet(string)] /// [if(string)] /// [label(string)] /// [keepx] [dropx] /// [keeplabel] [namelabel] /// [CASEignore] /// version 8.0 // declare error codes local syntaxError 198 local otherError 102 // display process comment display "" display "renaming variables from workbook `using'" display "" // preserve the existing data preserve ************************************************************************************** *** DEFINE MAPPING FROM RAW TO CLEAN VARIABLE NAMES *** // sheet can be specified only if using excel // delimiters can be specified only if using delimited file if ("`sheet'" != "") & ("`filetype'" != "excel") { display as error "Cannot specify sheet unless using excel filetype" exit `syntaxError' } if ("`delimiters'" != "") & ("`filetype'" != "delimited") { display as error "Cannot specify delimiters unless using delimited filetype" exit `syntaxError' } // get variable names if "`filetype'" == "excel" { import excel `"`using'"', sheet(`sheet') firstrow clear } else if "`filetype'" == "delimited" { insheet using `"`using'"', delimiter("`delimiters'") names case clear } else if "`filetype'" == "stata" { use `"`using'"', clear } else { display as error "`filetype' is not a valid filetype" exit `syntaxError' } quietly keep if !missing(`raw') & !missing(`clean') // restrict based on "if" option if `"`if'"' != "" { //" keep if `if' } // if caseignore option, rename all "old" vars to lowercase // keep a copy of "old" variables with original capitalization if needed later for variable labels tempvar raw_preserved gen `raw_preserved' = `raw' if "`caseignore'" == "caseignore" { replace `raw' = lower(`raw') } // verify that old and new variable names are unique foreach name in raw clean { tempvar N bysort ``name'': gen `N' = _N cap assert (`N' == 1) if _rc { display as error `"The function call was "renamefrom `0'" "' //" display as error "The `name' column contains duplicate variable names. Please specify a 1:1 mapping from old names to new names." list ``name'' if (`N' > 1) exit _rc } } // store the variable names and labels as locals local n_vars = _N forvalues i = 1/`n_vars' { local new_`i' = `clean'[`i'] local old_`i' = `raw'[`i'] local old_preserved_`i' = `raw_preserved'[`i'] if "`label'" != "" { local label_`i' = `label'[`i'] } } ************************************************************************************** *** RENAME VARIABLES *** // restore the master data restore //if caseignore option on, convert var to lower if "`caseignore'" == "caseignore" { // keep whole variable list to later keep capitalization of vars that were not renamed qui ds local old_vars `r(varlist)' foreach var of varlist _all { local lower = lower("`var'") if "`var'" != "`lower'" { qui rename `var' `lower' } } } // assert that all old variables in excel column are actually in dataset. forvalues i = 1/`n_vars' { capture confirm variable `old_`i'' if _rc { local missing_list `missing_list' `old_`i'' } } // print missing variables if "`missing_list'" != "" & "`dropx'" == "" { display as error `"The function call was "renamefrom `0'" "' //" display as error "The raw column contains the following variables not found in the master dataset." display "" display "`missing_list'" exit `otherError' } // order variables so that the ones you want to keep are first forvalues i = `n_vars'(-1)1 { order `old_`i'' } // rename vars, drop if indicated, and collect leftover vars otherwise local i = 1 local n_extra = 0 foreach v of varlist _all { if `i' <= `n_vars' { tempvar `new_`i'' if "`v'" != "``new_`i'''" { qui rename `v' ``new_`i''' } } else { if "`dropx'" == "dropx" { drop `v' } else { local extras `extras' `v' local ++n_extra } } local ++i } // this loop and the one above it have to happen separately because // tempvars and variable names have different caps on character length local i = 1 foreach v of varlist _all { if `i' <= `n_vars' { if "``new_`i'''" != "`new_`i''" { qui rename ``new_`i''' `new_`i'' } local ++i } } // revert to non-lowercased names for variables that were not renamed if (`n_extra' > 0) & ("`caseignore'" == "caseignore") { foreach extra_var of local extras { foreach old_var of local old_vars { if lower("`old_var'") == "`extra_var'" { local extras_cap `extras_cap' `old_var' if "`old_var'" != "`extra_var'" { rename `extra_var' `old_var' } break } } } } // label variables forvalues i = 1/`n_vars' { if "`label_`i''" != "" { // use Excel value if provided label var `new_`i'' "`label_`i''" } else if ("`keeplabel'" == "") { // If no Excel value, and keep label is not specified, label var `new_`i'' "" // overwrite existing label. } local varlab : variable label `new_`i'' // If label is left missing, if ("`varlab'" == "") & ("`namelabel'" != "") { // and name label option is specified, label var `new_`i'' "`old_preserved_`i''" // apply old name as new label. } } // list variables you haven't renamed yet if ("`keepx'" != "keepx") & (`n_extra' > 0) { display as error "The following variables were not assigned a clean name. Please provide a name for each in the supporting spreadsheet, or specify option 'keepx'" describe `extras_cap' exit `otherError' } end ************************************************************************************** ************************************************************************************** **************************************************************************************