*! version 1.7.1 PR/IRW/TPM 17jan2011. // For history, see end of this file program define uvis, rclass sortpreserve version 9.2 if `"`0'"'=="" { di as err "command required" exit 198 } gettoken cmd 0 : 0 if substr("`cmd'",1,3)=="reg" local cmd regress local normal=("`cmd'"=="regress")|("`cmd'"=="rreg")|("`cmd'"=="intreg") local binary=("`cmd'"=="logit")|("`cmd'"=="penlogit")|("`cmd'"=="auglogit")|("`cmd'"=="logistic") local catcmd=("`cmd'"=="mlogit")|("`cmd'"=="augmlogit")|("`cmd'"=="ologit")|("`cmd'"=="augologit") local countcmd=("`cmd'"=="nbreg") if !`normal' & !`binary' & !`catcmd' & !`countcmd' { di as err "invalid or unrecognised command, `cmd'" exit 198 } syntax varlist(min=1 numeric) [if] [in] [aweight fweight pweight iweight] , Gen(string) /// [ by(varlist) noCONStant Delta(string) BOot INTerval(varlist min=1 max=2) MAtch nopp noVERbose /// REPLACE REStrict(string) rn(int 0) SEed(int 0) TOTALweight(passthru) TIMESweight(passthru) /// matchtype(int 1) lrd MATCHPool(int 0) * ] /* Option LRD added to perform Local Residual Draws. TPM 21/10/10 Option matchtype takes values 1 (default) to use b for observed regression, 2 to use bstar and 3 to take a second bstar draw or boot estimate of b, 0 to use b for observed regression with ado-code (match_normal) instead of mata. */ if (`matchpool' > 0) & missing("`match'") { di as txt "[match not specified, so matchpool(`matchpool') ignored]" local matchpool 0 } if (`matchpool' <= 0) local matchpool if ("`match'" == "match") { if !`normal' { di as err "match not allowed with binary, categorical or count data" exit 198 } if missing("`matchpool'") local matchpool 3 } if `"`weight'"' != "" & "`boot'"!="" { di as err "boot not allowed with weights" exit 198 } if "`cmd'"=="intreg" & "`match'"!="" { // interval censored response variables di as err "match not allowed with intreg" exit 198 } capture confirm new var `gen' local rc = c(rc) if `rc' { if "`replace'"!="" drop `gen' else { di as err "`gen' already defined" exit 110 } } if "`verbose'"!="noverbose" { local verbosely noi if "`match'"=="match" { di as text "[imputing by prediction matching" _cont if !missing("`matchpool'") di " with matchpool(`matchpool')" _cont } else di as text "[imputing by drawing from conditional distribution" _cont if "`boot'"=="" di as text " without bootstrap]" else di as text " with bootstrap]" } else local verbosely qui if "`delta'"!="" & "`cmd'"!="mlogit" { /* IRW */ capture confirm number `delta' if c(rc) { di as error "invalid delta(#)" exit 198 } } if "`constant'"=="noconstant" { local options "`options' nocons" } if "`cmd'"=="intreg" { gettoken ll rest : varlist gettoken ul xvars: rest local rest } else gettoken y xvars : varlist tempvar touse // Set max number of ML iterations to 100 - more than enough for most problems set maxiter 100 quietly { marksample touse, novarlist markout `touse' `xvars' /* note: does not include `y' */ if "`restrict'"!="" { /* -restrict()- becomes part of touse when running the estimation command. */ tempvar touse_user gen byte `touse_user' = `touse' frac_restrict `touse' `restrict' // `touse' is a subsample of `touse_user' } else local touse_user `touse' tempvar bygroup if "`by'"!="" { // Sets up grouping variable corresponding to by-list markout `touse' `by', strok markout `touse_user' `by', strok egen int `bygroup' = group(`by') if `touse_user' sum `bygroup', meanonly local nby = r(max) if `nby'==0 { return local errmess "no valid observations" noisily error 2000 } } else { gen byte `bygroup' = 1 if `touse_user' local nby 1 } // Deal with weights frac_wgt `"`exp'"' `touse_user' `"`weight'"' local wgt `r(wgt)' if `seed'!=0 { set seed `seed' } if "`cmd'"=="intreg" { tempvar y gen `y' = cond(missing(`ll') & missing(`ul'), ., 0) if `touse_user'==1 // 0 is arbitrary local yvarlist `ll' `ul' } else local yvarlist `y' local type: type `y' tempvar obstype yimp Yimp xb u gen `type' `yimp' = . gen `type' `Yimp' = . gen byte `obstype' = . gen double `u' = . tempname b e V chol bstar local Nmis 0 local Nobs 0 forvalues g=1/`nby' { // start of by-group loop local ifg if `bygroup'==`g' & `touse'==1 local ifg_user if `bygroup'==`g' & `touse_user'==1 // Code types of missings: 1=non-missing y, 2=missing y, 3=other missing replace `obstype' = /// 1*(`touse_user'==1 & !missing(`y')) /// + 2*(`touse_user'==1 & missing(`y')) /// + 3*(`touse_user'==0) replace `obstype' = 3 if `bygroup'!=`g' count if `obstype'==1 local nobs = r(N) if `nobs' <= 2 { if `nobs' == 0 { local errmess no local errno 2000 } else { local errmess insufficient local errno 2001 } local errmess "`errmess' observations on which to estimate model" if `nby' > 1 local errmess "`errmess' in subgroup `i'" return local errmess = `"`errmess'"' noisily di as err `"`return(errmess)'"' exit `errno' } count if `obstype'==2 local nmis = r(N) local Nobs = `Nobs' + `nobs' local Nmis = `Nmis' + `nmis' // Fit imputation model and extract b and V capture `cmd' `yvarlist' `xvars' `wgt' `ifg', `options' local errno = c(rc) if `errno'>0 { if ("`cmd'"=="logit" | "`cmd'"=="logistic") & `errno'==2000 { local pp_cmd `cmd' return local errmess "error 2000 - may be caused by 100% perfect prediction of the response on`xvars'" noisily di as err `"`return(errmess)'"' } else if !(`errno'==430 & "`cmd'"=="ologit") error `errno' // ignore non-convergence in ologit } nagelkerke2 local r2 = r(r2) /* In the case of categorical regression, perform checks and use auglogit/augologit/augmlogit if perfect prediction detected. */ if `errno' != 2000 { pp_check `cmd' local pp_cmd `s(cmd)' } if "`pp_cmd'"!="" { `verbosely' di as txt "[perfect prediction detected: " _cont if "`pp'"!="nopp" { tempvar augvar wtvar `verbosely' di as txt "using aug`pp_cmd' to impute " as res "`yvarlist'" as txt "]" capture aug `yvarlist' `xvars' `wgt' `ifg', cmd(`pp_cmd') `options' /// augvar(`augvar') wtvar(`wtvar') `totalweight' `timesweight' clean_aug `augvar' `wtvar' nagelkerke2 local r2 = r(r2) } else `verbosely' di as text "no action taken]" } // !! not sure if this bit is in the right place - maybe after bootstrap fit if `catcmd' { if `g'==1 tempname cat /* mlogit version 10 outputs no. of categories in e(out) and base category in e(baseout). mlogit version 9.x outputs these in both e(out) and e(cat), similarly for base category. Use e(out) and e(baseout) for compatibility with version 10. ologit version 9.x and 10 outputs only in e(cat) and e(basecat). */ local cat_out = cond("`cmd'" == "mlogit", "out", "cat") local nclass = e(k_`cat_out') // number of classes in (ordered) categoric variable matrix `cat' = e(`cat_out') // row vector giving actual category values local cuts = `nclass' - 1 } if "`boot'" == "" { // Proper imputation of beta and rmse drawbeta, cmd(`cmd') matrix `b' = r(b) matrix `V' = r(V) local colsofb = r(colsofb) matrix `bstar' = r(bstar) if `normal' local rmsestar = r(rmsestar) } else { // Draw boot sample and refit model drawbeta, cmd(`cmd') nodraw matrix `b' = r(b) matrix `V' = r(V) local colsofb = r(colsofb) if `g'==1 { tempvar wt gen long `wt' = . } bsample if `obstype' == 1, weight(`wt') // If pp detected and fixed on original data, do same on boot sample if "`pp_cmd'" != "" & "`pp'" != "nopp" { drop if `augvar' capture aug `yvarlist' `xvars' `wgt' [fweight = `wt'] `ifg', cmd(`pp_cmd') `options' /// augvar(`augvar') wtvar(`wtvar') `totalweight' `timesweight' clean_aug `augvar' `wtvar' } else { capture `cmd' `yvarlist' `xvars' [fweight = `wt'] `ifg', `options' } drawbeta, cmd(`cmd') nodraw matrix `bstar' = r(b) if `normal' local rmsestar = r(rmse) } if `binary' | `catcmd' { if ("`boot'" == "") & ("`cmd'" != "penlogit") { // Evaluate normal approximation in categorical case if `g'==1 tempname normapprox matrix `normapprox' = (`bstar' - `b') * syminv(`V') * (`bstar' - `b')' noisily catlik, b(`b') local llhat = r(loglik) noisily catlik, b(`bstar') local llstar = r(loglik) local diagnostic = `llstar' - `llhat' + `normapprox'[1, 1] / 2 `verbosely' di as text "[true/approx likelihood = " %5.3f as res exp(`llstar' - `llhat') /// as text "/" %5.3f as res exp(-`normapprox'[1,1]/2) /// as text " = " %5.3f as res exp(`diagnostic') as text "]" } // Remove augmented observations, if perfect prediction has had to have been dealt with if "`augvar'" != "" { drop if `augvar' if `g' == `nby' drop `augvar' `wtvar' } } // Draw y, based on Ian White's code to implement van Buuren et al (1999). replace `u' = uniform() if `normal' | `binary' { // in normal or binary case, impute by sampling conditional distribution // or in normal case, by PMM if "`match'" == "match" | "`matchpool'"!="" { // predictive mean matching - only allowed for `normal' data if `g' == 1 { tempvar etaobs etamis if ("`lrd'" == "lrd") tempvar etaobsfull /* etaobsfull added by TPM 21/10/10 */ } else { if ("`lrd'" == "lrd") drop `etaobsfull' drop `etaobs' `etamis' } matrix score `etamis' = `bstar' if `obstype' == 2 // Include non-response location shift, delta. if "`delta'"!="" { replace `etamis' = `etamis' + `delta' } if `matchtype' <= 1 { // default is 1 // etaobsfull is required to use LRD option. TPM 21/10/10 if ("`lrd'" == "lrd") matrix score `etaobsfull' = `b' matrix score `etaobs' = `b' if `obstype' == 1 } else if `matchtype' == 2 { if ("`lrd'" == "lrd") matrix score `etaobsfull' = `bstar' /* TPM 21/10/10 */ matrix score `etaobs' = `bstar' if `obstype' == 1 } else if `matchtype' == 3 { // take a new draw for etaobs if `g' == 1 tempname bstar2 if "`boot'" == "" { drawbeta, cmd(`cmd') matrix `bstar2' = r(bstar) } else { bsample if `obstype' == 1, weight(`wt') capture `cmd' `yvarlist' `xvars' [fweight = `wt'] `ifg', `options' drawbeta, cmd(`cmd') nodraw matrix `bstar2' = r(b) } if ("`lrd'" == "lrd") matrix score `etaobsfull' = `bstar2' /* TPM 21/10/10 */ matrix score `etaobs' = `bstar2' if `obstype' == 1 } sort `obstype' `etaobs', stable if `matchtype' == 0 & "`matchpool'"=="" { // use ado-code for pmm match_normal `obstype' `nobs' `nmis' `etaobs' `etamis' `yimp' `y' } else { // use Mata for pmm tempvar resimp gen `resimp' = . capture noi mata: _matchpool_normal_mata(`nmis', `matchpool') if c(rc) { noi di as err "_matchpool_normal_mata execution error" exit c(rc) } if ("`lrd'" == "lrd") { // Replaces PMM-imputed value with a LRD. TPM 21/10/10 cap confirm var `resimp' // IRW 6dec2010 if _rc di as error "LRD error: variable resimp does not exist" // IRW 6dec2010 replace `yimp' = `etaobsfull' + `resimp' if `obstype' == 2 // IRW 6dec2010 } if (`rn' > 0) replace `yimp' = `yimp' + sqrt(`rmsestar') * rnormal() if `obstype' == 2 } } else { // sampling conditional distribution capture drop `xb' matrix score `xb' = `bstar' `ifg_user' if "`e(offset)'"!="" replace `xb' = `xb'+`e(offset)' if "`delta'"!="" { replace `xb' = `xb'+`delta' if `obstype'==2 } if `normal' { if "`cmd'"=="intreg" { if `g'==1 tempvar PhiA PhiB else drop `PhiA' `PhiB' gen double `PhiA' = cond(missing(`ll'), 0, norm((`ll'-`xb')/`rmsestar')) gen double `PhiB' = cond(missing(`ul'), 1, norm((`ul'-`xb')/`rmsestar')) replace `yimp' = `xb'+`rmsestar'*invnormal(`u'*(`PhiB'-`PhiA')+`PhiA') } else replace `yimp' = `xb'+`rmsestar'*invnormal(`u') } else replace `yimp' = (`u'0, lambda->B. */ matrix score `xb' = `bstar' `ifg' if "`e(offset)'"!="" replace `xb' = `xb'+`e(offset)' if `g'==1 { tempname bb variance } matrix `bb' = `bstar'[1,"lnalpha:_cons"] scalar `variance' = exp(2 * `bb'[1,1]) drop `yimp' if `variance' > 0.00001 { local parm1 = 1 / `variance' local parm2 = 1 / (1 + exp(`xb') * `variance') if (`parm1' < 0.1) | (`parm1' > 100000) { di as err "First parameter out of range in rnbinomial(`parm1', `parm2')" exit 498 } count if (`parm2' < 0.0001) | (`parm2' > 0.9999) if r(N) { di as error "Second parameter out of range in rnbinomial(`parm1', `parm2')" exit 498 } gen `yimp' = rnbinomial(`parm1', `parm2') } else gen `yimp' = rpoisson(exp(`xb')) replace `yimp' = 0 `ifg' & missing( `yimp' ) drop `xb' } else { // catcmd // draw: sample conditional distribution replace `yimp' = `cat'[1,1] if "`cmd'"=="ologit" { // Predict index independent of cutpoints // (note use of forcezero option to circumvent missing _cut* vars) matrix score `xb' = `bstar' `ifg_user', forcezero if "`e(offset)'"!="" replace `xb' = `xb'+`e(offset)' if "`delta'"!="" { replace `xb' = `xb'+`delta' if `obstype'==2 } forvalues k=1/`cuts' { // invlogit(...) is probability of being in category 1 or 2 or ... k local cutpt = `bstar'[1, `k'+`colsofb'-`cuts'] replace `yimp' = `cat'[1,`k'+1] if `u'>invlogit(`cutpt'-`xb') } drop `xb' } else { // mlogit // care needed dealing with different possible base categories if `g'==1 tempvar cusump sumexp else drop `cusump' `sumexp' local basecat = e(baseout) // actual basecategory chosen by Stata gen `sumexp' = 0 `ifg_user' if "`delta'"!="" { numlist "`delta'" local deltalist `r(numlist)' if word("`deltalist'",`nclass'+1)!="" { di as error "Length of delta() is greater than #categories" exit 198 } } // mlogit counts equation numbers from 1 and excludes the basecat local eqno 0 forvalues i=1/`nclass' { if `g'==1 tempvar xb`i' else drop `xb`i'' local thiscat = `cat'[1,`i'] if `thiscat'==`basecat' { gen `xb`i'' = 0 `ifg_user' } else { local ++eqno matrix score `xb`i'' = `bstar' `ifg_user', equation(#`eqno') } if "`delta'"!="" { local thisdelta: word `i' of `deltalist' capture confirm number `thisdelta' if _rc { di as error "Length of delta() is less than #categories" exit 198 } replace `xb`i'' = `xb`i''+`thisdelta' } replace `sumexp' = `sumexp' + exp(`xb`i'') } gen `cusump' = exp(`xb1')/`sumexp' forvalues i=2/`nclass' { replace `yimp' = `cat'[1,`i'] if `u'>`cusump' replace `cusump' = `cusump'+exp(`xb`i'')/`sumexp' replace `yimp' = . if missing(`xb`i'') } } } replace `Yimp' = `yimp' if `bygroup'==`g' } // end of by-group loop rename `Yimp' `gen' if "`cmd'"=="intreg" replace `gen' = `ll' if !missing(`ll') & `ll'==`ul' // uncensored else replace `gen' = `y' if !missing(`y') lab var `gen' "imputed from `yvarlist'" } `verbosely' di _n as res `Nmis' as txt " missing observations on " as res "`yvarlist'" /// as txt " imputed from " as res `Nobs' as txt " complete observations." return local pp_cmd `pp_cmd' return matrix b = `b' return matrix V = `V' return matrix bstar = `bstar' if `normal' return scalar rmsestar = `rmsestar' return scalar r2 = `r2' * 100 end program define _augment, rclass version 9.2 syntax varlist [if] [in] [fweight pweight aweight iweight], [TOTALweight(real 0) TIMESweight(real 0) /// noPREServe list wtvar(string) augvar(string) usevar(string) noequal] if "`wtvar'"=="" local wtvar _weight if "`augvar'"=="" local augvar _augment capture confirm new var `wtvar' if c(rc) drop `wtvar' capture confirm new var `augvar' if c(rc) drop `augvar' marksample touse gettoken y xlist: varlist // Remove collinearities _rmcoll `xlist' if `touse' local xlist `r(varlist)' local nx: word count `xlist' if `nx' == 0 { local errmess "insufficient predictors to perform perfect prediction" noisily di as err `"`errmess'"' return local errmess = "`errmess'" exit 2001 } tempvar augment wt if "`weight'"!="" gen `wt' `exp' else gen `wt' = 1 qui levelsof `y' if `touse', local(ylevels) qui tab `y' if `touse' & `wt'>0 /* IRW 12may2008 */ local nylevels = r(r) /* IRW 12may2008 */ qui summ `y' if `touse' [iweight=`wt'], meanonly /* IRW 12may2008 */ local totw = r(N) /* IRW 12may2008 */ local Nold = _N // Set total weight of added observations if `totalweight'>0 & `timesweight'>0 { di as error "Can't specify totalweight() and timesweight()" exit 498 } if `totalweight' == 0 { local totalweight = `nx' + 1 /*IRW: matches Clogg++91 and Firth93 */ if `timesweight'>0 local totalweight = `totalweight' * `timesweight' } /* global augmented `totalweight' */ // Number of added observations local Nadd = `nx'*2*`nylevels' local Nnew = `Nold'+`Nadd' di as text "Adding " as result `Nadd' as text " pseudo-observations with total weight " as result `totalweight' quietly { set obs `Nnew' gen `augment' = _n>`Nold' gen byte `usevar' = `touse' | `augment' // picks up augmented observations local thisobs `Nold' foreach x of var `xlist' { sum `x' [iweight=`wt'] if `touse' & !`augment' local mean = r(mean) local sd = r(sd) * sqrt((r(N) - 1) / r(N)) replace `x' = `mean' if `augment' foreach yval in `ylevels' { sum `wt' if `y' == `yval' & `touse' & !`augment', meanonly local py = cond("`equal'" == "noequal", r(sum) / `totw', 1 / `nylevels') foreach xnewstd in -1 1 { local ++thisobs replace `x' = `mean' + (`xnewstd') * `sd' in `thisobs' replace `y' = `yval' in `thisobs' replace `wt' = `totalweight' * `py' / (2 * `nx') in `thisobs' } } } if "`e(offset)'"!="" { sum `e(offset)' if `touse' replace `e(offset)' = r(mean) if `augment' } } rename `wt' `wtvar' rename `augment' `augvar' if "`list'" == "list" { di as txt _newline "Listing of added records:" list `xlist' `y' `wtvar' if `augvar', sep(4) } // In case of collinearity, return the trimmed list of predictors return local varlist `y' `xlist' end program define aug, rclass syntax varlist [if] [in] [fweight pweight], cmd(string) [TOTALweight(passthru) /// TIMESweight(passthru) wtvar(string) augvar(string) equal *] if "`wtvar'"=="" tempvar wtvar if "`augvar'"=="" tempvar augvar if "`weight'"!="" local weightexp [`weight'`exp'] tempvar touse _augment `varlist' `if' `in' `weightexp', `totalweight' `timesweight' `equal' wtvar(`wtvar') augvar(`augvar') usevar(`touse') local varlist `r(varlist)' // old varlist minus collinear vars if present local wttype=cond("`weight'"=="pweight","pw","iw") `cmd' `varlist' if `touse' [`wttype'=`wtvar'], `options' // `cmd' is logit, ologit or mlogit - returned in `s(cmd)' by pp_check end program define pp_check, sclass version 9.2 /* Check for perfect prediction in logistic regression. If found, use aug to fit model. */ args cmd sreturn clear if "`cmd'"=="logit" | "`cmd'"=="logistic" { tempname erules // Check if rules invoked or perfect prediction found matrix `erules' = e(rules) local c = colsof(`erules') local nperfect = e(N_cdf)+e(N_cds) if missing(`nperfect') local nperfect 0 forvalues nc = 1/`c' { local nperfect = `nperfect'+(`erules'[1, `nc'] > 0) } if `nperfect'>0 { // rules or perfect prediction found - will invoke auglogit sreturn local cmd logit } } else if "`cmd'"=="ologit" { // Check if perfect prediction found if e(N_cd)>0 { // perfect prediction found - will invoke augologit sreturn local cmd ologit } } else if "`cmd'"=="mlogit" { // Check if perfect prediction found levelsof `e(depvar)' if e(sample), local(ylevels) tempvar p foreach level in `ylevels' { predict `p', outcome(`level') sum `p', meanonly if r(min)<1E-9 { // perfect prediction - will invoke augmlogit sreturn local cmd mlogit continue, break } drop `p' } } end program define catlik, rclass // Return log-likelihoods for logit/mlogit/ologit at a given parameter vector b syntax, b(string) local y `e(depvar)' local cmd `e(cmd)' tempvar wt if "`e(wtype)'"!="" gen double `wt' `e(wexp)' else gen double `wt'=1 /* if "`e(offset)'"!="" { di as error "catlik: offset not allowed" exit 498 } */ quietly { if "`cmd'"=="mlogit" | "`cmd'"=="ologit" { tempname ecat local cat_out = cond("`cmd'" == "mlogit", "out", "cat") matrix `ecat' = e(`cat_out') local nylevels = e(k_`cat_out') tempvar ll xb psum gen double `ll' = . } if "`cmd'"=="mlogit" { gen double `psum' = 1 local eqno 1 forvalues i=1/`nylevels' { capture drop `xb' local yval = `ecat'[1,`i'] if `yval'!=e(baseout) { matrix score `xb' = `b', eq(#`eqno') if "`e(offset)'"!="" replace `xb' = `xb'+`e(offset)' replace `psum' = `psum'+exp(`xb') replace `ll' = `xb' if `y' == `yval' local ++eqno } else gen double `xb' = 0 replace `ll' = `xb' if `y'==`yval' } replace `ll' = `ll' - log(`psum') } else if "`cmd'"=="ologit" { tempvar p psumlag gen double `p' = . gen double `psumlag' = 0 matrix score `xb' = `b' if "`e(offset)'"!="" replace `xb' = `xb'+`e(offset)' forvalues i = 1/`nylevels' { local yval = `ecat'[1,`i'] if `i'<`nylevels' { local cut = `b'[1,colsof(`b')-`nylevels'+`i'+1] replace `p' = invlogit(`cut'-`xb')-`psumlag' replace `psumlag' = `psumlag'+`p' } else replace `p' = 1-`psumlag' replace `ll' = log(`p') if `y' == `yval' } } else if "`cmd'"=="logit" | "`cmd'"=="logistic" { tempvar ll xb matrix score `xb' = `b' if "`e(offset)'"!="" replace `xb' = `xb'+`e(offset)' gen double `ll' = (`y'!=0)*`xb'-log(1+exp(`xb')) } else { di as error "catlik doesn't work after regression command `cmd'" exit 498 } replace `ll' = `wt'*`ll' sum `ll' if e(sample), meanonly return scalar loglik = r(sum) } end program define drawbeta, rclass version 9.2 //Taken from uvis.ado syntax, cmd(string) [nodraw tweak(real 1E-12)] local normal = ("`cmd'" == "regress") | ("`cmd'" == "rreg") | ("`cmd'" == "intreg") tempname b V chol matrix `b' = e(b) matrix `V' = e(V) // intreg tags the lnsigma model onto the end of e(b) and e(V) - must be stripped if "`cmd'" == "intreg" { matrix `b' = `b'[1, "model:"] matrix `V' = `V'["model:", "model:"] } local p = colsof(`b') return scalar colsofb = colsof(`b') if "`draw'" == "nodraw" { if `normal' { tempname rmse if "`cmd'" == "intreg" { scalar `rmse' = exp([lnsigma]_b[_cons]) } else { scalar `rmse' = e(rmse) } return scalar rmse = `rmse' } } else { capture matrix `chol' = cholesky(`V') local rc = c(rc) if `rc' == 506 { matrix `chol' = cholesky((`tweak' * trace(`V')/`p') * I(`p') + `V') } else if `rc' > 0 { di as err "Cholesky decomposition failed with error " `rc' exit `rc' } if `normal' { // draw rmse tempname rmse df chi2 rmsestar if "`cmd'" == "intreg" { scalar `rmse' = exp([lnsigma]_b[_cons]) scalar `df' = e(N) - `p' } else { scalar `rmse' = e(rmse) scalar `df' = e(df_r) } scalar `chi2' = 2 * invgammap(`df' / 2, uniform()) scalar `rmsestar' = `rmse' * sqrt(`df' / `chi2') matrix `chol' = `chol' * sqrt(`df' / `chi2') return scalar rmsestar = `rmsestar' } // draw beta tempname e bstar matrix `e' = J(1, `p', 0) forvalues i = 1 / `p' { matrix `e'[1, `i'] = invnormal(uniform()) } matrix `bstar' = `b' + `e' * `chol'' return matrix bstar = `bstar' return matrix chol = `chol' } return matrix b = `b' return matrix V = `V' end program define match_normal * Prediction matching, normal or binary case. args obstype nobs nmis etaobs etamis yimp y quietly { * For each missing obs j, find index of observation * whose etaobs is closest to etamis[j]. tempvar sumgt tempname etamisi gen long `sumgt'=. * Sort etaobs within obstype sort `obstype' `etaobs', stable forvalues i=1/`nmis' { local j=`i'+`nobs' scalar `etamisi'=`etamis'[`j'] replace `sumgt'=sum((`etamisi'>`etaobs')) in 1/`nobs' sum `sumgt', meanonly local j1=r(max) if `j1'==0 { local index 1 local direction 1 } else if `j1'==`nobs' { local index `nobs' local direction -1 } else { local j2=`j1'+1 * if (`etamisi'-`etaobs'[`j1'])<(`etaobs'[`j2']-`etamisi') { if (2*`etamisi') < (`etaobs'[`j1']+`etaobs'[`j2']) { // slightly faster local index `j1' local direction -1 } else { local index `j2' local direction 1 } } * In case of tied etaobs values, add random offset to index in the appropriate direction * count if `obstype'==1 & reldif(`etaobs', `etaobs'[`index'])<1e-7 // counts as equality count if `obstype'==1 & `etaobs'==`etaobs'[`index'] // slightly faster if r(N)>1 { local index=`index'+`direction'*int(uniform()*r(N)) } replace `yimp'=`y'[`index'] in `j' } } end program define clean_aug version 9.2 // Clean up after failed aug command args augvar wtvar local rc = c(rc) if `rc' > 0 { drop if `augvar' drop `augvar' `wtvar' error `rc' } end program define nagelkerke2, rclass version 9.2 tempvar touse cap gen byte `touse' = e(sample) if c(rc) error 301 local cmd `e(cmd2)' if missing("`cmd'") local cmd `e(cmd)' if inlist("`cmd'", "stcox", "cox", "streg", "stpm") { qui count if (_d == 1) & (e(sample) == 1) local n = r(N) local obs events } else { qui count if (e(sample) == 1) local n = r(N) local obs observations } if missing(e(ll_0)) local r2 = 1 - exp(-1 * e(chi2) / `n') else local r2 = 1 - exp(-2 * (e(ll) - e(ll_0)) / `n') di as txt _n "Nagelkerke R-squared = " as res %9.6f `r2' as txt " (" as res `n' as txt " `obs')" return scalar r2 = `r2' return scalar N = `n' end version 9.2 mata: mata set matastrict on void _matchpool_normal_mata(real scalar nmis, real scalar k) { real colvector etaobs, etamis, y, index, dif, Wsum real matrix W real scalar i, i0, nobs, neta, ind, yimpind // get variable index of temporary variable yimp yimpind = st_varindex(st_local("yimp")) resimp = st_varindex(st_local("resimp")) // IRW 6dec2010 // create views to store other variables st_view(etaobs=.,.,st_local("etaobs")) st_view(etamis=.,.,st_local("etamis")) st_view(y=.,.,st_local("y")) // select only nonmissing values of etaobs etaobs = select(etaobs, (etaobs:!=.)) neta = rows(etaobs) if (neta==0) { errprintf("_matchpool_normal_mata(): missing predictions encountered\n") exit(498) } // nobs are the effective number of observations, ignoring obstype 3 ones nobs = neta + nmis i0 = neta + 1 // looping over missing values for (i=i0; i<=nobs; i++) { if (etamis[i]==.) { errprintf("_matchpool_normal_mata(): missing predictions encountered\n") exit(498) } // find k predictions closest to etamis[i] dif = etaobs:-etamis[i] minindex(abs(dif), k, index, W) Wsum = runningsum(W[.,2]) s = sum(Wsum:<=k) ind = trunc(k*uniform(1,1))+1 // Start of Ian's new code to replace // if (ind>Wsum[s]) ind = Wsum[s] + trunc(W[s+1,2]*uniform(1,1)) + 1 if (s>0) Wsums=Wsum[s] else Wsums=0 unif=uniform(1,1) if (ind>Wsums) // End of Ian's new code ind = Wsums + trunc(W[s+1,2]*unif) + 1 // store imputed values in temporary variable yimp _st_store(i, yimpind, y[index[ind]]) // and store imputed residuals in temporary variable resimp, IRW 6dec2010 _st_store(i, resimp, y[index[ind]]-etaobs[index[ind]]) // IRW 6dec2010 } } end exit History 1.7.1 17jan2011 Fixes to LRD with matchpool() (IRW). Removed match_normal_mata() - redundant. 1.7.0 02dec2010 Added undocumented option -lrd- to implement LRD (Tim Morris). 1.6.1 13jul2010 Bug in parsing/use of match/matchpool() fixed. 1.6.0 10jun2010 -matchpool()- improved. 1.5.9 26apr2010 Added support for PMM based on Schenker and Taylor 1996 algorithm (new -matchpool()- option). 1.5.8 02dec2009 Allowed any variable (not just in mainvarlist) in right hand side of passive() expressions. Experimental option rn(#) to add random normal noise to match imputations 1.5.7 19nov2009 Added Nagelkerke R2 to -debug- option. Improved support for -uvis penlogit-. 1.5.6 21oct2009 Added support for e(offset). Fixed bug in support for negative binomial. 1.5.5 13may2009 Fixed bug in by() with perfect prediction. 1.5.4 05may2009 Fixed bug in subprogram aug - missing `if' `in'. 1.5.3 21apr2009 Tidy up after perfect prediction - otherwise could get the augmented observations. Issue with weights in augment sorted out. 1.5.2 16apr2009 Fixed bug with use of mlogit - sometimes failed to pick up correct equations. 1.5.1 14feb2009 Fixed bug in _match_normal_mata(). In the case of ties, the command could perform a random shift in the wrong direction. This was due to the inconsistent definition of ties used to compute the number of ties ( reldif(a,b)<1e-7 is used) and to determine the indices of ties (a==b was implied). This has been fixed. 1.5.0 11feb2009 Major code tidy-up. 1.4.4 19dec2008 Fixed precision issue that allowed uniform random numbers to be 0 or 1 in rare cases. 1.4.3 02oct2008 Minor bug fixes and refinements. 1.4.2 19sep2008 Further refinements to modelling perfect prediction Fixed bug in perfect prediction with by() 1.4.1 11sep2008 Fix bug in match option - Mata code did not allow missing observations (obstype=3) Add timesweight() and totalweight(); allowed penalogit (IRW). by() option added to do imputation within levels of a byvarlist (no)verbose option added to suppress messages 1.4.0 17jun2008 Add restrict() option to assist out-of-sample computations. Add cmd(nbreg) for count data (Ian White). 1.3.0 14may2008 Major speed improvements to match_normal, by recoding in Mata (thanks Yulia Marchenko). 1.2.8 12may2008 Minor speed improvements to match_normal, including remove unnecessary scalars count*. Bug in _augment when dealing with weights of different types fixed 1.2.7 04dec2007 Likelihood check for -logit-, -ologit- and -mlogit- from Ian White. 1.2.6 10nov2007 Updated version of -auglogit-, -augologit-, -augmlogit- from Ian White, changes # of augmented cases. 1.2.5 10oct2007 Updated version of -auglogit- from Ian White, also including -augologit-, -augmlogit-. nopp option added to suppress avoidance of perfect prediction bug. 1.2.4 31jul2007 Improved version of -auglogit- from Ian White. 1.2.3 30mar2007 Minor changes to -auglogit- and -pp_check- from Ian White. 1.2.2 27mar2007 Check for perfect prediction in logistic regression; new routine pp_check. Replace log(p/(1-p)) with logit(p), similarly for invlogit. 1.2.1 15dec2006 Allow -auglogit- to deal with perfect prediction in logistic regression. 1.2.0 30jun2006 Interval censoring implemented via interval() option. 1.1.0 03aug2005 Replace -draw- option with -match-. Default becomes draw. With prediction matching, randomly sort observations with identical predictions. Order variables in chained equations in order of increasing missingness. 1.0.4 21jun2005 Add sort, stable to enable reproducibility imputations with given seed