*! version 1.0 08feb2017 Cong Ye capture program drop rsz program define rsz version 12.0 syntax [if] [in], strsamsize(name) replicates(string) ranseed(string) [alt(string) strata(varlist) sorting(varlist) consorting(varlist) serp(string) mos(name) exprr(string) inflatedsamsize(string) desiredinitial(string) extra(string) restore(string) ] di " " qui { foreach v in _rszorder _rszpartial _stratagroup _dupstr _strtotal _rszstrsize _rszcomplete _originaltotal _rsztotal _perzone _maxvals _minvals /// _tempran _dupzone _dupgroup _zonetotal _runtotal _grouptotal _temp { tempvar `v' } foreach v in _rszgroup _rszzone _rszreplicate _rszweight _srsweight _sysweight _98rsz* { capture drop `v' if _rc==0 noi { di as text "{it:Warning: variable `v' in the input data was overwritten. Rename it to avoid being overwritten.}" } } set more off local alt=lower("`alt'") local serp=upper("`serp'") local inflatedsamsize=upper("`inflatedsamsize'") local restore=upper("`restore'") gen `_rszorder'=_n gen `_rszpartial'=1 `if' `in' replace `_rszpartial'=. if `strsamsize'==. gen `_tempran'=uniform() if `_rszpartial'==1 egen `_stratagroup'=group(`strata') if `_rszpartial'==1 sort `_stratagroup' `sorting' `consorting' `_tempran' by `_stratagroup': gen `_dupstr'=cond(_N==1,1,_n) if `_rszpartial'==1 by `_stratagroup': egen `_strtotal'=max(`_dupstr') if `_rszpartial'==1 if "`inflatedsamsize'"=="Y" & "`exprr'"=="" noi { di as error "Error: exprr must be supplied if inflatedsamsize is 'Y'." sort `_rszorder' exit } if "`exprr'"=="" { local exprr=1 } if "`inflatedsamsize'"=="Y" { gen `_rszstrsize'=`strsamsize' if `_rszpartial'==1 gen `_rszcomplete'=round(`strsamsize'*(1-(1-`exprr')^`replicates'),1) if `_rszpartial'==1 } else { gen `_rszstrsize'=round(`strsamsize'/(1-(1-`exprr')^`replicates'),1) if `_rszpartial'==1 gen `_rszcomplete'=`strsamsize' if `_rszpartial'==1 } set seed `ranseed' if "`alt'"~="srs" & "`alt'"~="sys" { egen `_rsztotal'=sum(`_rszstrsize') if `_rszpartial'==1 & `_dupstr'==1 egen `_originaltotal'=sum(`strsamsize') if `_rszpartial'==1 & `_dupstr'==1 if "`desiredinitial'"~="" { set seed `ranseed' local iter=1 while "`desiredinitial'"~="`actualinitial'" & `iter'<100 { local `_temp'=uniform()-0.5 sum `_originaltotal' local originaltotal=r(mean) replace `_rszstrsize'=round(`strsamsize'*`desiredinitial'/`originaltotal'+``_temp'',1) if `_rszpartial'==1 capture drop `_rsztotal' egen `_rsztotal'=sum(`_rszstrsize') if `_rszpartial'==1 & `_dupstr'==1 sum `_rsztotal' local actualinitial=r(mean) sum `_rszcomplete' if `_rszpartial'==1 & `_dupstr'==1 local rszcomplete=r(sum) local iter=`iter'+1 } if `iter'>=100 noi { dis as text "{it:Warning: actual initial total does not equal to desired initial total.}" } } else { sum `_originaltotal' local originaltotal=r(mean) sum `_rsztotal' local actualinitial=r(mean) sum `_rszcomplete' if `_rszpartial'==1 & `_dupstr'==1 local rszcomplete=r(sum) } sort `_stratagroup' `sorting' `consorting' `_tempran' gen _rszgroup=ceil(`_dupstr'/`_strtotal'*`_rszstrsize'-0.0000000001) gen `_perzone'=round(2*`_rszstrsize'/`_rszcomplete',1) gen _rszzone=ceil(_rszgroup/`_perzone') sort `_stratagroup' _rszzone `_tempran' by `_stratagroup' _rszzone: egen `_maxvals'=max(_rszgroup) if `_rszpartial'==1 by `_stratagroup' _rszzone: egen `_minvals'=min(_rszgroup) if `_rszpartial'==1 replace _rszzone=_rszzone-1 if `_maxvals'-`_minvals'+1<=`_perzone'/2 & `_rszpartial'==1 drop `_maxvals' `_minvals' `_originaltotal' `_rsztotal' `_dupstr' `_strtotal' `_rszcomplete' sort `_stratagroup' _rszzone `_tempran' by `_stratagroup' _rszzone: egen `_maxvals'=max(_rszgroup) if `_rszpartial'==1 by `_stratagroup' _rszzone: egen `_minvals'=min(_rszgroup) if `_rszpartial'==1 replace `_perzone'=`_maxvals'-`_minvals'+1 if `_rszpartial'==1 sort `_stratagroup' _rszzone `_tempran' by `_stratagroup' _rszzone: gen `_dupzone'=cond(_N==1,1,_n) if `_rszpartial'==1 by `_stratagroup' _rszzone: egen `_zonetotal'=count(`_rszpartial') if `_rszpartial'==1 replace _rszgroup=ceil(`_dupzone'/`_zonetotal'*`_perzone'-0.0000000001) drop `_perzone' `_dupzone' `_zonetotal' } if "`extra'"~="" { local totreplicates=`replicates'+`extra' } else { local totreplicates=`replicates' } if "`alt'"=="srs" { local _zonegroup_="" replace `_rszstrsize'=ceil(`strsamsize'*`totreplicates'/`exprr') if `_rszpartial'==1 } if "`alt'"~="srs" { if "`serp'"=="N" { local _strsort_="`sorting' `consorting'" } else if "`sorting'"~="" { foreach v of varlist `sorting' { tab `v' if `_rszpartial'==1 if r(r)>100 noi { di as text "{it:Warning: sorting variable {bf:`v'} has too many categories. You can define it in the option {it:consorting} instead.}" } tab `v' if `_rszpartial'==1,gen(_98rsz`v') } gen `_temp'=0 foreach v of varlist _98rsz* { replace `v'=`v'*(-1) if `_temp'==0 & `_rszpartial'==1 replace `_temp'=`v' } local _strsort_="_98rsz* `consorting'" } else if "`consorting'"~="" { local _strsort_="`consorting'" } if "`alt'"=="sys" { local _zonegroup_="" replace `_rszstrsize'=ceil(`strsamsize'*`totreplicates'/`exprr') if `_rszpartial'==1 } else { local _zonegroup_="_rszzone _rszgroup" } } if "`mos'"=="" { local mos=1 } set seed `ranseed' replace `_tempran'=uniform() if `_rszpartial'==1 sort `_stratagroup' `_zonegroup_' `_strsort_' `_tempran' capture drop `_minvals' capture drop `_dupstr' by `_stratagroup' `_zonegroup_': gen `_minvals'=_n== 1 if `_rszpartial'==1 by `_stratagroup' `_zonegroup_': gen `_runtotal'=sum(`mos') if `_rszpartial'==1 by `_stratagroup' `_zonegroup_': egen `_grouptotal'=max(`_runtotal') if `_rszpartial'==1 by `_stratagroup' `_zonegroup_': gen `_dupstr'=_n if `_rszpartial'==1 if "`alt'"=="srs" & "`mos'"=="1" { gen _rszweight=`_grouptotal'/`mos'/`_rszstrsize' if `_rszstrsize'~=. & `_dupstr'<=`_rszstrsize' & `_rszpartial'==1 drop `_dupstr' `_strtotal' `_rszcomplete' } if "`alt'"=="sys" | ("`alt'"=="srs" & "`mos'"~="1") { drop `_dupstr' `_strtotal' `_rszcomplete' gen _rszweight=. capture drop `_temp' egen `_temp'=group(`_stratagroup' `_zonegroup_') if `_rszpartial'==1 sum `_temp' local _strnum_=r(max) forvalues _m_=1(1)`_strnum_' { sum `_rszstrsize' if `_rszpartial'==1 & `_temp'==`_m_' local _strmax_=r(max) local temp=uniform() forvalues k=1(1)`_strmax_' { replace _rszweight=1 if (((`_grouptotal'*`temp'/`_rszstrsize'+`_grouptotal'*(`k'-1)/`_rszstrsize')<=`_runtotal' & `_minvals'==1) /// | ((`_grouptotal'*`temp'/`_rszstrsize'+`_grouptotal'*(`k'-1)/`_rszstrsize')<=`_runtotal' /// & (`_grouptotal'*`temp'/`_rszstrsize'+`_grouptotal'*(`k'-1)/`_rszstrsize')>`_runtotal'[_n-1] & `_minvals'~=1)) & `_rszpartial'==1 & `_temp'==`_m_' } } replace _rszweight=`_grouptotal'/`mos'/`_rszstrsize' if _rszweight~=. } if "`alt'"~="srs" & "`alt'"~="sys" { gen _rszreplicate=. capture drop `_temp' egen `_temp'=group(`_stratagroup' `_zonegroup_') if `_rszpartial'==1 sum `_temp' local _strnum_=r(max) forvalues _m_=1(1)`_strnum_' { local i=1 local temp=uniform() replace _rszreplicate=`i' if ((`_grouptotal'*`temp'<=`_runtotal' & `_minvals'==1) /// | (`_grouptotal'*`temp'<=`_runtotal' & `_grouptotal'*`temp'>`_runtotal'[_n-1] & `_minvals'~=1)) & `_rszpartial'==1 & `_temp'==`_m_' local iter=1 local min=0 while `replicates'>1 & `min'<`totreplicates' & `iter'<1000 { local i=`i'+1 local temp=uniform() replace _rszreplicate=`i' if _rszreplicate==. & ((`_grouptotal'*`temp'<=`_runtotal' & `_minvals'==1) /// | (`_grouptotal'*`temp'<=`_runtotal' & `_grouptotal'*`temp'>`_runtotal'[_n-1] & `_minvals'~=1)) & `_rszpartial'==1 & `_temp'==`_m_' drop `_maxvals' by `_stratagroup' `_zonegroup_': egen `_maxvals'=count(_rszreplicate) if `_rszpartial'==1 & `_temp'==`_m_' sum `_maxvals' local min=r(min) local iter=`iter'+1 } if `iter'>=1000 noi { dis as text "{it:Warning: some random groups do not have the desired number of replicates.}" } } sort `_stratagroup' `_zonegroup_' _rszreplicate by `_stratagroup' `_zonegroup_': gen `_dupgroup'=cond(_N==1,1,_n) if `_rszpartial'==1 & _rszreplicate~=. replace `_dupgroup'=. if `_dupgroup'>`totreplicates' & `_dupgroup'~=. replace _rszreplicate=`_dupgroup' gen _rszweight=`_grouptotal'/`mos' if _rszreplicate~=. } if "`restore'"~="N" { sort `_rszorder' } foreach v in _98rsz* { capture drop `v' } if "`alt'"=="srs" { rename _rszweight _srsweight local _wt_="srs" } if "`alt'"=="sys" { rename _rszweight _sysweight local _wt_="sys" } if "`alt'"~="srs" & "`alt'"~="sys" { local _wt_="rsz" } sum _`_wt_'weight if r(min)<1 { di as error "Error: some cases have selection probability lower than 1. They should be selected with" di as error _column(5) "certainty and excluded from the input data. Check _`_wt_'weight for details." exit } } di " " di as text _dup(80) "*" di as text "{it:Sampling results: }" if "`alt'"~="srs" & "`alt'"~="sys" { di as text _column(5) "{it:Cases in each group {bf:_rszgroup} of each zone {bf:_rszzone} in each of your stratum}" di as text _column(9) "{it:should be released in sequence as indicated in {bf:_rszreplicate}.}" di as text _column(5) "{it:Sampling Weight {bf:_`_wt_'weight} was assigned to each sampled case.}" di as text _column(5) "{it:Sample size in the initial replicate: }" as result `actualinitial' di as text _column(5) "{it:Sample size in expected completes: }" as result `rszcomplete' di as text _column(5) "{it:Sample size as specified: }" as result `originaltotal' } if "`alt'"=="srs" | "`alt'"=="sys" { di as text _column(5) "{it:Sampling Weight {bf:_`_wt_'weight} was assigned to each sampled case.}" di as text _column(5) "{it:Sample size drawn: }" as result r(N) } di as text _column(5) "{it:Number of replicates as specified: }" as result `totreplicates' di as text _column(5) "{it:Random seed as specified: }" as result `ranseed' if "`strata'"~="" { di as text _column(5) "{it:Stratifying variable(s) as specified: }" as result "`strata'" } if "`sorting'"~="" | "`consorting'"~="" { di as text _column(5) "{it:Sorting variable(s) as specified: }" as result "`sorting' `consorting'" } if "`mos'"~="1" { di as text _column(5) "{it:Measure of size as specified: }" as result "`mos'" } di " " di as text _column(5) "{it:Summary of the weight variable {bf:_`_wt_'weight}:}" sum _`_wt_'weight di " " di as text _dup(80) "*" end