*! Inspirit of -winsor-(NJ Cox) and -winsorizeJ-(J Caskey) *! Lian Yujun, arlionn@163.com, 2013-12-25 *! 1.1 2014.12.16 cap program drop winsor2 program def winsor2, sortpreserve version 8 syntax varlist(min=1) [if] [in] /* */ [, Suffix(str) REPLACE Trim Cuts(numlist max=2 min=2 >=0 <=100) by(varlist) Label] if "`replace'"!="" & "`suffix'"!=""{ dis in w "suffix() " in red "cannot be specified with" in w " replace" exit 198 } if "`suffix'"==""{ if "`trim'" == ""{ local suffix="_w" } else{ local suffix="_tr" } } if "`cuts'"==""{ local low=1 local high=99 } else{ tokenize "`cuts'" local low=`1' mac shift local high=`1' if `low'>`high' { tempname tmp local `tmp'=`low' local low=`high' local high=`tmp' } if `low'>0&`low'<1|`high'>0&`high'<1{ if "`trim'"!=""{ local CUT "trim" } else{ local CUT "winsor" } dis in y "Warning: " in g "cuts(1 99) means `CUT' at 1th percentile and 99th percentile," dis in g " " in g "cuts(0.1 90) means `CUT' at 0.1th percentile and 90th percentile," dis in g " " in g "make sure cuts(`low' `high') you specified is what you want. " } if `low'==0&`high'==100{ dis in red "option cuts(`cuts') is incorrect, no action taken." exit } } * Validate suffix if "`replace'" == ""{ foreach k of varlist `varlist' { capture confirm variable `k'`suffix', exact if _rc == 0 { di as error "variable `k'`suffix' already exist, re-specify option -suffix()-" di as error "Suffix `suffix' is invalid for `k'" exit 111 } } } * Validate by list if "`by'" != "" { capture confirm variable `by' if _rc != 0 { di as error "by() list is invalid" exit 111 } } * Winsorize or Trimming tempname if2 if "`by'" == "" { // no by() foreach k of varlist `varlist' { *qui centile `k' `if' `in', centile(`low' `high') if `low'!=0&`high'!=100{ qui _pctile `k' `if' `in', p(`low' `high') local qleft = r(r1) local qright= r(r2) } else if `low'==0{ qui _pctile `k' `if' `in', p(`high') local qright = r(r1) qui sum `k' local qleft = r(min) } else if `high'==100{ qui _pctile `k' `if' `in', p(`low') local qleft = r(r1) qui sum `k' local qright = r(max) } if "`if'"==""{ local `if2' "if ~missing(`k')" } else{ local `if2' "`if' & ~missing(`k')" } local vtype=`"`: type `k''"' if "`replace'"!=""{ // replace if "`trim'"==""{ //winsorize qui replace `k' = max(min(`qright',`k'),`qleft') ``if2'' `in' local labk : variable label `k' local labk = cond("`labk'"=="","`k'","`labk'") if "`label'"!=""{ if `low'<1{ local low "0`low'" } label var `k' "`labk'-Winsor(p`low',p`high')" } else{ label var `k' "`labk'" } } else{ //trimming qui replace `k' = cond(`k'<`qleft',.,`k') ``if2'' `in' qui replace `k' = cond(`k'>`qright',.,`k') ``if2'' `in' local labk : variable label `k' local labk = cond("`labk'"=="","`k'","`labk'") if "`label'"!=""{ if `low'<1{ local low "0`low'" } label var `k' "`labk'-Trim(p`low',p`high')" } else{ label var `k' "`labk'" } } } else{ // noreplace, gen new variable if "`trim'"==""{ //winsorize qui gen `vtype' `k'`suffix'=max(min(`qright',`k'),`qleft') ``if2'' `in' local labk : variable label `k' local labk = cond("`labk'"=="","`k'","`labk'") if "`label'"!=""{ if `low'<1{ local low "0`low'" } label var `k'`suffix' "`labk'-Winsor(p`low',p`high')" } else{ label var `k'`suffix' "`labk'" } } else{ //Trimming qui gen `vtype' `k'`suffix' = cond(`k'<`qleft',.,`k') ``if2'' `in' **********Modification 1 by LXC******************** //qui replace `k'`suffix' = cond(`k'>r(r2),.,`k') ``if2'' `in' qui replace `k'`suffix' = cond(`k'>`qright',.,`k'`suffix') ``if2'' `in' **********Modification 1 ends********************** local labk : variable label `k' local labk = cond("`labk'"=="","`k'","`labk'") if "`label'"!=""{ if `low'<1{ local low "0`low'" } label var `k'`suffix' "`labk'-Trim(p`low',p`high')" } else{ label var `k'`suffix' "`labk'" } } } } } else{ // with by() foreach k of varlist `varlist' { tempvar pL pH tL if "`if'"==""{ local `if2' "if ~missing(`k')" } else{ local `if2' "`if' & ~missing(`k')" } local vtype=`"`:type `k''"' if `low'!=0&`high'!=100{ qui egen `vtype' `pL'=pctile(`k') ``if2'' `in', p(`low') by(`by') //lowbound qui egen `vtype' `pH'=pctile(`k') ``if2'' `in', p(`high') by(`by') //highbound } else if `low'==0{ qui egen `vtype' `pL'=min(`k') ``if2'' `in', by(`by') //lowbound qui egen `vtype' `pH'=pctile(`k') ``if2'' `in', p(`high') by(`by') //highbound } else if `high'==100{ qui egen `vtype' `pL'=pctile(`k') ``if2'' `in', p(`low') by(`by') //lowbound qui egen `vtype' `pH'=max(`k') ``if2'' `in', by(`by') //highbound } qui egen `vtype' `tL'=rowmax(`pL' `k') ``if2'' `in' if "`replace'"!=""{ if "`trim'"==""{ //winsorize tempvar kwinsor qui egen `vtype' `kwinsor'=rowmin(`pH' `tL') ``if2'' `in' qui replace `k' = `kwinsor' local labk : variable label `k' local labk = cond("`labk'"=="","`k'","`labk'") if "`label'"!=""{ if `low'<1{ local low "0`low'" } label var `k' "`labk'-Winsor(p`low',p`high')" } else{ label var `k' "`labk'" } qui drop `kwinsor' } else{ //Trimming qui replace `k' = cond(`k'<`pL',.,`k') ``if2'' `in' qui replace `k' = cond(`k'>`pH',.,`k') ``if2'' `in' local labk : variable label `k' local labk = cond("`labk'"=="","`k'","`labk'") if "`label'"!=""{ if `low'<1{ local low "0`low'" } label var `k' "`labk'-Trim(p`low',p`high')" } else{ label var `k' "`labk'" } } } else{ if "`trim'"==""{ //winsorize qui egen `k'`suffix'=rowmin(`pH' `tL') ``if2'' `in' qui drop `pL' `pH' `tL' local labk : variable label `k' local labk = cond("`labk'"=="","`k'","`labk'") if "`label'"!=""{ if `low'<1{ local low "0`low'" } label var `k'`suffix' "`labk'-Winsor(p`low',p`high')" } else{ label var `k'`suffix' "`labk'" } } else{ qui gen `vtype' `k'`suffix' = cond(`k'<`pL',.,`k') ``if2'' `in' **********Modification 2 by LXC******************** //qui replace `k'`suffix' = cond(`k'>`pH',.,`k') ``if2'' `in' qui replace `k'`suffix' = cond(`k'>`pH',.,`k'`suffix') ``if2'' `in' **********Modification 2 ends********************** local labk : variable label `k' local labk = cond("`labk'"=="","`k'","`labk'") if "`label'"!=""{ if `low'<1{ local low "0`low'" } label var `k'`suffix' "`labk'-Trim(p`low',p`high')" } else{ label var `k'`suffix' "`labk'" } } } } } end