*! version 1.2.1 08feb2012 Daniel Klein pr todummy ,sclass vers 9.2 syntax varlist(num) [if][in] /* */ [ , /* */ Values(str) Percentile Cut Levels MEDian q /* */ STUB(namelist) Generate(namelist) PREfix(name) SUFFix(str) /* */ REPLACE noNAMes /* */ Label(str asis) RLabel(str asis) noVARLabel /* */ Missing /* */ noSKip NOSKip(str) /* */ RO(str) noEXClude /* undocumented syntax from todummy1 */ LEQ * /* */ ] * mark sample marksample touse ,nov qui cou if `touse' if (r(N) == 0) err 2000 * get undocumented options (old syntax) if ("`options'" != "") { _oldsyntax ,`options' if ("`s(_td_vk)'" == "k") loc levels levels else { loc values `s(_td_vk)' loc `s(_td_opt)' `s(_td_opt)' } sret loc _td_vk sret loc _td_opt } if ("`leq'" != "") { if ("`options'" == "") { di as txt "(note: you are using old {cmd:todummy} " /* */ "syntax; see {help todummy} for new syntax)" } if ("`ro'" != "") { if ("`ro'" != "<=") { di as err "option ro not allowed with leq" e 198 } } else loc ro <= } * check options // values or keyword specified loc k = ("`levels'`median'`q'" != "") if ("`values'" == "") { if !(`k') { di as err "one of values or {it:keyword} must be specified" e 198 } foreach opt in percentile cut { if ("``opt''" != "") { di as err "option `opt' only allowed with values" e 198 } } } if ("`values'" != "") & (`k') { di as err "only one of values or {it:keyword} may be specified" e 198 } if ("`percentile'" != "") & ("`cut'" != "") { di as err "only one of percentile or cut may be specified" e 198 } if ("`median'" != "") & ("`q'" != "") { di as err "only one of median or q may be specified" e 198 } // name options if ("`replace'" != "") { foreach opt in generate prefix suffix stub levels { if ("``opt''" != "") { di as err "option replace not allowed with `opt'" e 198 } } } if ("`stub'" != "") { loc stubdup : list dups stub if ("`stubdup'" != "") { di as err "`stubdup' menitioned more than once" e 198 } if ("`generate'" != "") { di as err "option stub not allowed with generate" e 198 } if ("`names'" != "") { di as err "option stub not allowed with nonames" e 198 } } if ("`generate'" != "") { loc gendup : list dups generate if ("`gendup'" != "") { di as err "`gendup' mentioned more than once" e 198 } } if ("`names'" != "") & ("`levels'" == "") { di as err "option nonames only allowed with levels" e 198 } // label options if (`"`macval(rlabel)'"' != "") { if (`"`macval(label)'"' != "") { di as err "option label not allowed with rlabel" e 198 } loc label `"`macval(rlabel)'"' loc oi i } if ("`varlabel'" != "") { if (`"`macval(label)'"' != "") { di as err "option novarlabel not allowed with [r]label" e 198 } } // advanced options if ("`noskip'" != "") { if ("`noskip'" != "drop") & ("`noskip'" != "replace") { di as err "invalid option noskip" e 198 } if ("`skip'" != "") { di as err " noskip not allowed with noskip(`noskip')" e 198 } loc skip drop } if ("`ro'" != "") { if ("`levels'" != "") | /* */ (("`percentile'`cut'`median'`q'" == "")) { di as err "option ro not allowed" e 198 } if !inlist("`ro'", ">", "<", ">=", "<=", "==", "!=", "~=") { di as err "invalid option ro" e 198 } } if ("`exclude'" != "") & /* */ ("`percentile'`median'`q'`levels'" == "") { di as err "option noexclude only allowed " /* */ "with percentile or levels" e 198 } * check/set default if ("`values'" != "") & ("`percentile'`cut'" == "") { loc default 1 } else loc default 0 * get values // settings if ("`median'" != "") | ("`q'" != "") { if ("`median'" != "") loc vlst1 50 else loc vlst1 25 50 75 loc nvlst 1 loc eq1 0 loc percentile percentile loc hasmiss1 0 } if ("`values'" != "") { if ("`percentile'" != "") { loc opt r(>0 <100) } else { if (`default') { loc opt max(249) loc sep , } else loc sep loc opt `opt' miss } // parse vlists loc i 0 while ("`values'" != "") { loc ++i loc values : subinstr loc values "q" " 25 50 75 " ,all gettoken vlst values : values ,p(\) gettoken b values : values ,p(\) gettoken eq vlst : vlst ,p(=) if ("`eq'" != "=") { loc vlst `eq' loc eq`i' 0 } else loc eq`i' 1 numlist "`vlst'" ,asc `opt' loc vlst`i' `r(numlist)' loc hasmiss`i' 0 if ("`percentile'" == "") { loc tmp loc ftmp foreach v of loc vlst`i' { if !(`hasmiss`i'') loc hasmiss`i' = mi(`v') loc tmp `tmp'`sep' `v' loc ftmp `ftmp'`sep' float(`v') } loc vlst`i' `tmp' loc fvlst`i' `ftmp' } } loc nvlst `i' } * final checks and settings // names loc nvars : word count `varlist' if ("`stub'" != "") { if (`: word count `stub'' != `nvars') { di as err "option stub: number of stubs does not " /* */ "match number of variables" e 198 } loc cmd g byte } else { loc ndum 0 // overall dummies to be created if ("`levels'" != "") { foreach var of loc varlist { qui ta `var' `tou' ,`missing' loc ndum = `ndum' + r(r) } } else if ("`levels'" == "" ) { if (`default') loc ndum `nvlst' else { forval j = 1/`nvlst' { loc nvals : word count `vlst`j'' loc pm 0 if (`nvals' > 1) { loc pm = cond(`eq`j'', (-1), 1) } loc ndum = `ndum' + `nvals' + `pm' } } } loc mdum = cond(`ndum' > `nvars', 1, 0) if ("`replace'" != "") { if (`mdum') { di as err "option replace not allowed" e 198 } loc cmd replace } else if ("`replace'" == "") { loc ngen : word count `generate' if !(`ngen') { if ("`prefix'`suffix'`levels'" == "") & !(`mdum') { di as txt "(note: default prefix {hi:d_} set)" loc prefix d_ } } else if (`ngen') { if (`ngen' != `ndum') { di as err "option generate: number of names " /* */ "does not match number of dummies to be created" e 198 } foreach g of loc generate { conf new v `prefix'`g'`suffix' } } loc cmd g byte } } // advanced options if ("`oi'" == "") loc oi o if ("`ro'" == "") loc ro >= if ("`exclude'" == "") loc tou "if `touse'" else loc tou // counters loc c 0 // count variables in varlist loc o 0 // count created dummies (overall) * temporary variable tempvar cpyvar * create dummies from variables foreach var of loc varlist { if ("`levels'" == "") { cap drop `cpyvar' qui g `cpyvar' = `var' } loc ++c loc f = cond("`: t `var''" == "double", "", "f") // default (one) if (`default') { loc i 0 // count dummies per variable forval j = 1/`nvlst' { loc ++i loc ++o loc notmiss = cond(`hasmiss`j'', "", "& !mi(`var')") // get/check name if ("`replace'" != "") loc nam `var' else { _gcnam `var' ,c(`c') i(`i') o(`o') /* */ mdum(`mdum') skip(`skip') stub(`stub') /* */ generate(`generate') /* */ prefix(`prefix') suffix(`suffix') `s(_td_con)' loc nam `s(_td_nam)' } // create dummy qui `cmd' `nam' = inlist(`var'``f'vlst`j'') /* */ if `touse' `notmiss' if ("`cmd'" == "replace") qui compress `nam' // copy missing values if (`hasmiss`j'') { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if !(`nam') & mi(`cpyvar') } } else { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if mi(`cpyvar') & `touse' } } // label if ("`varlabel'" == "") & (`"`macval(label)'"' != "") { _labd `nam' ,l(`"`macval(label)'"') oi(``oi'') } } continue } * not default if !(`default') { * levels if ("`levels'" != "") { loc i 0 // count dummies per variable qui levelsof `var' `tou' ,`missing' l(lvls) foreach l of loc lvls { loc ++i loc ++o loc labok 1 // get/check name if ("`names'" != "") loc nam `var'`i' else { if ("`stub'" != "") { loc nam `: word `c' of `stub''`i' } else { loc nam : word `o' of `generate' } if ("`nam'" == "") { loc nam : lab (`var') `l' ,strict if (`"`macval(nam)'"' != "") { loc nam : subinstr loc nam "`" "_" ,all loc nam : subinstr loc nam " " "_" ,all cap conf name `nam' if _rc { if (c(stata_version) < 11) { loc nam `var'`i' } else { loc nam = /* */ strtoname(`"`macval(nam)'"') } } if (`"`macval(label)'"' == "") loc labok 0 } else loc nam `var'`i' } } loc nam `prefix'`nam'`suffix' cap conf new v `nam' if _rc { if ("`skip'" != "") { if ("`skip'" == "drop") qui drop `nam' else loc nam : permname `nam' } else { di as txt "(note: " as res "`nam' " /* */ as txt "already exists)" continue } } // create dummy if ("`f'" == "f") { qui g byte `nam' = `var' == float(`l') /* */ if `touse' } else qui g byte `nam' = `var' == `l' if `touse' // copy missing values if ("`missing'" == "") { qui replace `nam' = `var' /* */ if mi(`var') & `touse' } // label if (`"`macval(varlabel)'"' == "") & (`labok') { loc def : lab (`var') `l' ,strict if (`"`macval(def)'"' == "") { loc def "`var' (`= round(`l', .01)')" } _labd `nam' ,l(`"`macval(label)'"') /* */ oi(``oi'') def(`def') } } continue } * percentile and cut loc i 0 //count dummies per variable forval j = 1/`nvlst' { loc notmiss = cond(`hasmiss`j'', "", "& !mi(`var')") // parse values percentiles or cutpoints loc n : word count `vlst`j'' if ("`percentile'" != "") { _pctile `var' `tou' ,p(`vlst`j'') } forval k = 1/`n' { if ("`percentile'" != "") loc val`k' `r(r`k')' else loc val`k' : word `k' of ``f'vlst`j'' } // only one value in numlist if (`n' == 1) { loc ++i loc ++o // get/check name if ("`replace'" != "") loc nam `var' else { _gcnam `var' ,c(`c') i(`i') o(`o') /* */ mdum(`mdum') skip(`skip') stub(`stub') /* */ generate(`generate') /* */ prefix(`prefix') suffix(`suffix') `s(_td_con)' loc nam `s(_td_nam)' } // create dummy qui `cmd' `nam' = `var' `ro' `val1' /* */ if `touse' `notmiss' if ("`cmd'" == "replace") qui compress `nam' // copy missing values if (`hasmiss`j'') { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if !(`nam') & mi(`cpyvar') } } else { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if mi(`cpyvar') & `touse' } } // label if ("`varlabel'" == "") & ("`median'" == "") { loc def "`var' (`ro' `=round(`val1', .01)')" _labd `nam' ,l(`"`macval(label)'"') /* */ oi(``oi'') def(`def') } continue } // more than on value in numlist if !(`eq`j'') { loc ++i loc ++o // get/check name if ("`replace'" != "") loc nam `var' else { _gcnam `var' ,c(`c') i(`i') o(`o') /* */ mdum(`mdum') skip(`skip') stub(`stub') /* */ generate(`generate') /* */ prefix(`prefix') suffix(`suffix') loc nam `s(_td_nam)' } if ("`s(_td_con)'" == "") { // create first dummy if needed qui g byte `nam' = `var' <= `val1' /* */ if `touse' `notmiss' // copy missing values if (`hasmiss`j'') { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if !(`nam') & mi(`cpyvar') } } else { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if mi(`cpyvar') & `touse' } } // label if ("`varlabel'" == "") { loc def "`var' (<= `= round(`val1', .01)')" _labd `nam' ,l(`"`macval(label)'"') /* */ oi(``oi'') def(`def') } } } // create more dummies forval k = 2/`n' { loc ++i loc ++o // get/check name if ("`replace'" != "") loc nam `var' else { _gcnam `var' ,c(`c') i(`i') o(`o') /* */ mdum(`mdum') skip(`skip') stub(`stub') /* */ generate(`generate') /* */ prefix(`prefix') suffix(`suffix') `s(_td_con)' loc nam `s(_td_nam)' } // create dummy loc pre = `k' - 1 qui g byte `nam' = `var' > `val`pre'' /* */ & `var' <= `val`k'' if `touse' `notmiss' // copy missing values if (`hasmiss`j'') { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if !(`nam') & mi(`cpyvar') } } else { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if mi(`cpyvar') & `touse' } } // label if ("`varlabel'" == "") { loc v1 = round(`val`pre'', .01) loc v2 = round(`val`k'', .01) loc def "`var' (`v1'-`v2')" _labd `nam' ,l(`"`macval(label)'"') /* */ oi(``oi'') def(`def') } } // create last dummy in list if needed if !(`eq`j'') { loc ++i loc ++o // get/check name if ("`replace'" != "") loc nam `var' else { _gcnam `var' ,c(`c') i(`i') o(`o') /* */ mdum(`mdum') skip(`skip') stub(`stub') /* */ generate(`generate') /* */ prefix(`prefix') suffix(`suffix') `s(_td_con)' loc nam `s(_td_nam)' } // create dummy qui g byte `nam' = `var' > `val`n'' /* */ if `touse' `notmiss' // copy missing values if (`hasmiss`j'') { if ("`missing'" ! = "") { qui replace `nam' = `cpyvar' /* */ if !(`nam') & mi(`cpyvar') } } else { if ("`missing'" != "") { qui replace `nam' = `cpyvar' /* */ if mi(`cpyvar') & `touse' } } // label if ("`varlabel'" == "") { loc def "`var' (> `= round(`val`n'', .01)')" _labd `nam' ,l(`"`macval(label)'"') /* */ oi(``oi'') def(`def') } } } } } sret loc _td_nam sret loc _td_con end pr _gcnam ,sclass syntax varname /* */ [ , /* */ c(numlist) i(numlist) o(numlist) mdum(numlist) skip(str) /* */ stub(namelist) generate(namelist) prefix(name) suffix(str) /* */ ] sret loc _td_nam sret loc _td_con loc var `varlist' * get name if ("`stub'" != "") { loc nam `: word `c' of `stub''`i' } else { loc nam : word `o' of `generate' if ("`nam'" == "") { if (`mdum') loc nam `var'`i' else loc nam `var' } } loc nam `prefix'`nam'`suffix' * check name cap conf new v `nam' if _rc { if ("`skip'" != "") { if ("`skip'" == "drop") qui drop `nam' else { loc newnam : permname `nam' di as txt "(note: " as res "`nam' " /* */ as txt "already exists; creating " /* */ as res "`newnam' " as txt "instead)" loc nam `newnam' } } else { di as txt "(note: " as res "`nam' " /* */ as txt "already exists)" sret loc _td_con "continue" } } sret loc _td_nam "`nam'" end pr _labd syntax varname [,l(str) oi(numlist) def(str)] loc lbl : word `oi' of `l' if (`"`macval(lbl)'"' == "") loc lbl `def' if strpos(`"`macval(lbl)'"', `"""') la var `varlist' `"`lbl'"' else la var `varlist' "`lbl'" end pr _oldsyntax ,sclass syntax [ , Percentile(numlist > 0 < 100 max = 1) /* */ Cut(numlist max = 1) One(str) Each Distinct ] di as txt "(note: you are using old {cmd:todummy} syntax; " /* */ "see {help todummy} for new syntax)" loc addo 0 foreach opt in percentile cut one each distinct { loc addo = `addo' + ("``opt''" != "") } if (`addo' > 1) { di as err "only one of percentile, cut, each " /* */ "or distinct is allowed" e 198 } if ("`each'" != "") | ("`distinct'" != "") sret loc _td_vk k else { foreach opt in percentile cut one { if ("``opt''" != "") { sret loc _td_vk ``opt'' sret loc _td_opt `opt' continue ,br } } } end e 1.2.1 08feb2012 compatibility with version 9.2 code polish 1.2.0 21jul2011 code completely rewritten (version 1.1.2 still availiable on request) 1.1.2 21may2011 fix bug with double precision 1.1.1 na add -binary- and -noskip- option extend -one()- option (allow multiple numlists) specifying more than one main option is an error add some checks add subroutine _err and _labd 1.1.0 na fix problems with left single quotes 1.0.9 na add -replace- option no longer check version minor changes in error messages