*! Date : 1 Aug 2005 *! Version : 1.64 *! Authors : Adrian Mander/David Clayton *! Email : adrian.mander@mrc-hnr.cam.ac.uk *! Description : Hotdeck imputation program define hotdeck6 version 6.0 syntax [varlist] [if] [in] [using/], [BY(varlist) IMPute(integer 1) STORE GENerate(string) COMmand(string) PARMS(string) REPlace NOISE KEEP(varlist) SEED(string) QUIET INFILES(string) ] tokenize "`varlist'" local z "`1'" preserve if "`if'"~="" { qui keep `if' } /* Check the seed option */ if "`seed'"=="1" { local seed 2} if "`seed'"=="" { local seed 1 } confirm number `seed' /* To generate a seed from the time */ if `seed'==1 { local time = "$S_TIME" local date = "$S_DATE" tokenize "`time'", parse(":") local seed1 "`1'`3'`5'" tokenize "`date'", parse(" ") local dat "`1'`2'`3'" local dat1 = date("`dat'","dmy") local seed1 "`seed1'`dat1'" di local l_seed "2^31-1" local seed1 = mod(`seed1',`l_seed') di in green "Seed is `seed1'" set seed `seed1' } estimates clear tempfile olddata /*NOT SURE if I must implement no strings in BY() option tokenize "`by'" while "`1'"~="" { confirm numeric variable `1' mac shift 1 } */ tempvar touse mark `touse' `if' `in' markout `touse' `by', strok di in green "DELETING all matrices...." mat drop _all if "`infiles'"=="" { if "`by'"=="" { _misspat `varlist' `if' `in' } else { _misspat `varlist' `if' `in', by(`by') } local nfill=r(nmiss) } qui save "`olddata'" if "`infiles'"=="" { global allpat = r(allpat) count if `touse' local miss = (r(N)-`nfill')/r(N) } if "`command'"=="" { di in red "WARNING: When the option is not selected " di in red "then no analysis is performed on the imputed datasets" di if "`store'"=="" { di "ALSO STORE isnt selected so hotdeck will appear to do nothing" exit(198) } } if `impute'<1 { di in red "The number of imputations must be more than 0 not `impute'" exit(198) } if `impute'==1 & "`infiles'"=="" { if "`store'"=="" | "`command'"~="" { di in red "If one imputation is made then command option should NOT be used" di in red "AND the store option must be specified" exit(198) } } if "`using'"~="" { if "`store'"=="" { di in red "To save datasets you must specify the STORE option" exit(198) } } if "`keep'"~="" { if "`store'"=="" { di in red "If you use the KEEP option you must specify the STORE option" exit(198) } } if "`noise'"~="" & "`command'"=="" { di in red "When specifying noise you must also specify the command option" exit(198) } if "`command'"~="" { if "`parms'"=="" { di in red "To obtain any output from the command option you must also specify " di in red "the parameters of interest using the parms() option" exit(198) } } /************************************************ * Loop over the number of imputed data sets * required ************************************************/ if "`seed'"~="1" {set seed `seed'} /* This is the if statement that allows the input of imputed datafiles */ if "`infiles'"~="" { local i 1 tokenize "`infiles'" while "`1'"~="" { use "`1'",replace mac shift 1 if "`command'"=="" { di in red "You must use the command option when using INFILES" exit(198) } if "`noise'"~="" { `command' } else { qui `command' } /* Do the analysis */ _parms, parms(`parms') command(`command') iter(`i') /* Select Parameters of interest*/ local i =`i'+1 } local impute=`i'-1 } else { local i 1 while `i'<= `impute' { use "`olddata'",clear /* Use original dataset */ qui keep if `touse' if "`by'"~="" { _hotdeck `varlist', by(`by') } else { _hotdeck `varlist' } /* Impute values */ if "`store'"~="" { /* Save imputed datasets */ if "`using'"=="" { local using "imp" } if "`keep'"=="" { qui keep `varlist' `by' qui save `using'`i',replace } else { mkvlist `varlist' `by', vlist(`keep') qui keep `r(vlist)' qui save `using'`i',replace } } if "`command'"~="" { if "`noise'"~="" { `command' } else { qui `command' } /* Do the analysis */ _parms, parms(`parms') command(`command') iter(`i') /* Select Parameters of interest*/ } local i=`i'+1 } } if "`command'"~="" { /******************************************************** * Loop to calculate the estimates needed * * First get the dimensions of the parameter matrices ********************************************************/ local dim= rowsof(impV1) mat Qbar = J(1,`dim',0) mat Ubar = J(`dim',`dim',0) /* calc the averaging factor */ local inv = 1/`impute' /* calc the average coef and variance qbar and ubar */ local i 1 while `i'<= `impute' { mat Qbar= `inv'*impb`i'+ Qbar mat Ubar= `inv'*impV`i'+ Ubar local i=`i'+1 } /* calc between variances */ mat B=J(`dim',`dim',0) local inv1 = 1/(`impute'-1) local i 1 while `i'<= `impute' { mat B= B + `inv1'*(impb`i' - Qbar)'*(impb`i' - Qbar) local i=`i'+1 } /* Calc total variance */ mat T = Ubar+(1+1/`impute')*B cap mat tempmt=B*inv(Ubar) if _rc==504 { di as error "WARNING: Trying to invert variance matrix with zero elements?" local ter = rowsof(Ubar) mat temp = J(`ter',1,1) mat temp2 = Ubar*temp local tei 1 local names: colfullnames(impb1) matrix rownames temp2 = `names' while `tei'<=`ter' { if temp2[`tei',1]==0 { local var:word `tei' of "`names'" di as txt "Variance for covariate `tei' is 0 !!" } local tei=`tei'+1 } mat tempmt=B*inv(Ubar) } local trace=trace(tempmt) local r1 = 1-((1+1/`impute')*`trace'/`dim') local names: rowfullnames(impb1) matrix rownames Qbar = `names' local names: colfullnames(impb1) matrix colnames Qbar = `names' local names: rowfullnames(impV1) matrix rownames T = `names' matrix rownames B = `names' matrix rownames Ubar = `names' local names: colfullnames(impV1) matrix colnames T = `names' matrix colnames B = `names' matrix colnames Ubar = `names' mat Tsurr= `r1'*T mat D = Qbar*inv(Tsurr)*Qbar' local D1 = D[1,1]/`dim' local t=`dim'*(`impute'-1) local v1= 4+(`t'-4)*(1+(1-2/`t')*1/`r1')^2 local ftest= fprob(`dim',`v1',`D1') /******************************************************** * The next will output the main results in Stata style * if the normal approximation is good then you could * use the matrix post command ********************************************************/ if "`quiet'"=="" { if `r1'<0 { di in red "WARNING: between se larger than within se in one or more " di in red "parameters invalidating the global F test" } if `t'<4 { di in red "WARNING: t less than 4 invalid global test increase " di in red "parameters OR imputations" } } di di in gr _col(1) "Number of Obs.", _col(45) " = ", _N di in gr _col(1) "No. of Imputations", _col(48) "= ", `impute' if "`infiles'"=="" { di in gr _col(1) "% Lines of Missing Data", _col(45) " = ", `miss'*100,"%" } di in gr _col(1) "F(",%6.3f `v1',",`dim')", _col(45) " = ", %9.4f `D1' di in gr _col(1) "Prob > F " , _col(45) " = ", %9.4f `ftest' di in gr _dup(83) "-" local names: colfullnames(impb1) local name : word 1 of `names' local i 1 di in gr _continue "Variable |",_col(12) "Average",_col(21) "Between", _col(30) "Within", _col(40) "Total", _col(50) "df", _col(59) "t", _col(65) "p-value" di di in gr _continue" |",_col(12) "Coef.",_col(21) "Imp. SE", _col(30) "Imp. SE", _col(39) " SE", _col(50) "", _col(59) "", _col(65) "" di di in gr _continue "-------------+---------------------------------------------------------------------" while "`name'"~="" { di mat qhat=Qbar[1,"`name'"] mat b=B["`name'","`name'"] mat u=Ubar["`name'","`name'"] mat t=T["`name'","`name'"] local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2 local ttest= qhat[1,1]/sqrt(t[1,1]) di _continue "`name'",_col(9) "|", _col(10) %7.4f qhat[1,1],_col(17) %9.3f sqrt(b[1,1]), _col(25) %9.3f sqrt(u[1,1]), _col(34) %9.3f sqrt(t[1,1]), _col(44) %9.1f `df', _col(53) %9.3f `ttest', _col(62) %9.3f tprob(`df',`ttest') local i=`i'+1 local name : word `i' of `names' } di di in gr _continue "-------------+---------------------------------------------------------------------" di local name : word 1 of `names' local i 1 di in gr _continue "Variable |",_col(12) "[$S_level% Conf. Interval]" di di in gr _continue "-------------+---------------------------------------------------------------------" while "`name'"~="" { di mat qhat=Qbar[1,"`name'"] mat b=B["`name'","`name'"] mat u=Ubar["`name'","`name'"] mat t=T["`name'","`name'"] local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2 local ttest= qhat[1,1]/sqrt(t[1,1]) local prob = 1-((100-$S_level)/2)/100 local tvalue = invt(`df',`prob') local left = qhat[1,1]-`tvalue'*sqrt(t[1,1]) local right = qhat[1,1]+`tvalue'*sqrt(t[1,1]) di _continue "`name'",_col(9) "|", %9.4f `left', %9.4f `right' local i=`i'+1 local name : word `i' of `names' } di "" di in gr _dup(83) "-" } /* end of command if statement */ restore end /**************************************************** * The approximate Bayesian Bootstrap hotdecking ****************************************************/ program define _hotdeck version 6.0 syntax [varlist] [using], [BY(string)] tokenize "`varlist'" local z "ipattern" if "`by'"!="" { confirm ex var `by' } tempvar nobs bstrp b2strp temp temp2 qui { local nold = _N local nnew = _N sort `by' `z' `varlist' gen long `nobs' = (`z'!=.) if "`by'"=="" { replace `nobs' = sum(`nobs') replace `nobs' = `nobs'[_N] gen long `bstrp' = int(uniform()*`nobs'+1) gen long `b2strp' = int(uniform()*`nobs'+1) gen long `temp' = `bstrp'[`b2strp'] replace `bstrp' = `temp' replace `bstrp' = _n if _n<=`nobs' tokenize "`varlist'" while "`1'"~="" { gen `temp2' = `1'[`bstrp'] replace `1' = `temp2' drop `temp2' mac shift 1 } } else { by `by': replace `nobs' = sum(`nobs') by `by': replace `nobs' = `nobs'[_N] by `by': gen long `bstrp' = int(uniform()*`nobs'+1) by `by': gen long `b2strp' = int(uniform()*`nobs'+1) by `by': gen long `temp' = `bstrp'[`b2strp'] by `by': replace `bstrp' = `temp' by `by': replace `bstrp' = _n if _n<=`nobs' tokenize "`varlist'" while "`1'"~="" { by `by': gen `temp2' = `1'[`bstrp'] by `by': replace `1' = `temp2' mac shift 1 drop `temp2' } } } end /******************************************************************* * Get the parameters or a subset of them from the * model and the subset * the covariance variance matrix as well * Note that this section can also handle non-regression commands * and macro lists *******************************************************************/ program define _parms syntax [varlist], [PARMS(string) ITER(integer 1) COMMAND(string) GENerate(string) REPlace] local 0 "`parms'" while "`parms'"~="" { gettoken 0 parms: parms , parse(" ,") cap syntax [varlist] if _rc~=0 { if "`0'"=="_cons" { local vlist "`vlist' `0'" } else { local plist "`plist' `0'" } } else { local vlist "`vlist' `varlist'" } } * if results were not part of a regression command if "`e(cmd)'"=="" { local names = "" if `iter'==1 { di in red "Using Non Regression Parameters and Command" } tokenize "`plist' `vlist'" local np=0 while "`1'"~="" { local names1 ="`names1' `1'" local names2 ="`names2' `2'" if "`2'"=="" { di in red "Must supply variance estimate of `1'" exit(302) } mac shift 2 local np = `np'+1 } mat impb`iter' = J(1,`np',0) mat impV`iter' = J(`np',`np',0) tokenize "`plist' `vlist'" local np 1 while "`1'"~="" { if "$`1'"=="" & "``1''"=="" { di in red "Global = $`1' Local = ``1''" di in red "Global/local macro `1' is missing " exit(198) } if "``1''"~="" { mat impb`iter'[1,`np'] = ``1'' } if "$`1'"~="" & "``1''"=="" { mat impb`iter'[1,`np'] = $`1' } if "$`2'"=="" & "``2''"=="" { di in red "Global = $`2' Local = ``2''" di in red "Global/local macro `2' is missing " exit(198) } if "``2''"~="" { mat impV`iter'[`np',`np'] = ``2'' } if "$`2'"~="" & "``2''"=="" { mat impV`iter'[`np',`np'] = $`2' } local np=`np'+1 mac shift 2 } matrix colnames impb`iter'=`names1' matrix colnames impV`iter'=`names1' matrix rownames impV`iter'=`names1' } else { matrix myb = get(_b) matrix myV = get(VCE) tokenize "`vlist' `plist'" if "`2'"~="" { cap mat impb`iter' = myb[.,"`1'"] if _rc==111 { di as error "Are you sure `1' is in the model??" di as error "Check the matrix of estimates and only include column names in the parameters NOT variable names" exit(111) } mac shift 1 while "`1'"~="" { mat temp=myb[.,"`1'"] mat impb`iter'= impb`iter' , temp mac shift 1 } tokenize "`vlist' `plist'", parse(" ") mat impVt`iter' = myV[.,"`1'"] mac shift 1 while "`1'"~="" { mat temp=myV[.,"`1'"] mat impVt`iter'= impVt`iter' , temp mac shift 1 } tokenize "`vlist' `plist'", parse(" ") mat impV`iter' = impVt`iter'["`1'",.] mac shift 1 while "`1'"~="" { mat temp=impVt`iter'["`1'",.] mat impV`iter'= impV`iter' \ temp mac shift 1 } mat drop myb mat drop myV mat drop impVt`iter' mat drop temp } else { cap mat impb`iter' = myb[.,"`1'"] if _rc==111 { di as error "Are you sure `1' is in the model??" exit(111) } mat impVt`iter'=myV[.,"`1'"] mat impV`iter' = impVt`iter'["`1'",.] } } end /************************************************* * Look at the missing pattern in the varlist *************************************************/ program define _misspat,rclass syntax varlist [if] [in] , [BY(string) ] tokenize "`varlist'" tempvar touse2 mark `touse2' `if' `in' markout `touse2' qui gen str40 pattern="" local allstr "" while "`1'"~="" { qui replace pattern = cond(`1'==.,pattern+"*",pattern+"-") if `touse2' local allstr="-`allstr'" mac shift 1 } qui compress pattern sort pattern lab var pattern "Missing pattern" di di in green "Missing Patterns" di in green "----------------" di di in green "Table of the Missing data patterns " di in green " * signifies missing and - is not missing" di di "Varlist order: `varlist'" tab pattern if `touse2' local n=r(N) qui count if pattern=="`allstr'" & `touse2' if r(N)==`n' { di "There is no missing data in the varlist" exit(198) } return scalar nmiss = `n'-r(N) return local allpat = "`allstr'" qui gen ipattern=cond(pattern=="`allstr'",1,.) if `touse2' /***************************************** * Calculate stratum missing numbers *****************************************/ if "`by'"~="" { di di in green "STRATUM information" di in green "-------------------" di di in green "Listing the number observed (No_obs) and " di in green "the number missing (No_miss) in each stratum" tempvar cnt mcnt qui sort `by' qui by `by':gen `cnt'=sum(ipattern) qui by `by':gen `mcnt'=sum(ipattern==.) qui by `by': replace `cnt'=cond( _n==_N,`cnt',.) qui by `by': replace `mcnt'=cond( _n==_N,`mcnt',.) rename `cnt' No_obs rename `mcnt' No_miss l `by' No_obs No_miss if No_obs~=., noobs di qui count if No_obs==0 if `r(N)'>0 { di in red "WARNING: `r(N)' strata with NO observed data" } qui count if No_obs==1 if `r(N)'>0 { di in blue "`r(N)' strata with only 1 observed data"} qui count if (No_obs>1 & No_obs<6) if `r(N)'>0 { di in blue "`r(N)' strata with 2-5 observed data"} di } end /************************************************* * Expand stata syntax *************************************************/ program define mkvlist, rclass syntax varlist, VLIST(string) local o_vlist "`varlist'" local keep "`vlist'" local 0 "`keep'" while "`keep'"~="" { gettoken 0 keep: keep , parse(" ,") cap syntax [varlist] if _rc~=0 { if "`0'"=="_cons" { local vlist "`vlist' `0'" } else { local plist "`plist' `0'" } } else { local vlist "`vlist' `varlist'" } } return local vlist "`o_vlist' `vlist'" end