*! Program to apply Rubin's rule to estimation results from multiply-imputed datasets *! Assumes that you have estimations results stored as `stub'1, `stub'2, etc. *! Version 1.0.1 capture program drop mirubin capture mata: mata drop rubin() program define mirubin, eclass version 12 syntax , [stub(name) Imputations(numlist) force repost noSMALL ] if "`stub'" == "" { if "`e(cmd)'" == "mirubin" { _coef_table // for -est replay- exit } else { error 301 } } if "`imputations'" == "" { local i = 0 capture while _rc == 0 { local i=`i'+1 capture est dir `stub'`i' } if `i' == 1 { di as err "Cannot find matrix {res:``type'stub'`i'}" exit 999 } else local nimp = `i' - 1 numlist "1/`nimp'" local imputations `r(numlist)' } local nlist `imputations' local nimp : word count `imputations' gettoken firstn otherns : imputations if "`force'" == "" { // Check if estimation results have the same e(N) and that e(b) and e(V) have the same colnames. // Check also if e(depvar) and e(cmd) the same across results // Check also whether e(converged) == 1 in all results. qui est restore `stub'`firstn' local colnames : colfullnames e(b) local n = e(N) local converged = e(converged) if `converged' == 0 { di as err "Results not converged in `stub'`firstn'" local error yes } local depvar `e(depvar)' local cmd `e(cmd)' foreach imp in `nlist' { qui est restore `stub'`imp' // Check colnames/rownames of e(b), e(V) foreach type in e(b) e(V) { local testcolnames : colfullnames `type' if "`testcolnames'" != "`colnames'" { di as err "The column names of `type' in `stub'`firstn' is:" di as res "`colnames'" di as err "but the column names of `type' in `stub'`imp' is:" di as res "`testcolnames'" local error yes } } // Check e(N) local testn = e(N) if `n' != `testn' { di as err "e(N) in `stub'`firstn' is: {res:`n'}" di as err "but e(N) in `stub'`imp' is: {res:`testn'}" local error yes } // Check e(converged) local converged = e(converged) if `converged' == 0 { di as err "Results not converged in `stub'`imp'" local error yes } // Check e(depvar) local testdepvar `e(depvar)' if "`testdepvar'" != "`depvar'" { di as err "e(depvar) in `stub'`firstn' is:" di as res "`depvar'" di as err "but e(depvar) in `stub'`imp' is:" di as res "`testdepvar'" local error yes } // Check e(cmd) local testcmd `e(cmd)' if "`testcmd'" != "`cmd'" { di as err "e(cmd) in `stub'`firstn' is:" di as res "`cmd'" di as err "but e(cmd) in `stub'`imp' is:" di as res "`testcmd'" local error yes } } if "`error'" == "yes" exit 999 } // Restore the first result (in case the repost option is specified) qui est restore `stub'`firstn' local colnames : colfullnames e(b) local n = e(N) local depvar `e(depvar)' local cmd `e(cmd)' // Clarify which results we've got di as txt "Estimations found:" local estnames `""`stub'`firstn'""' di as res"`stub'`firstn'" foreach imp in `otherns' { local estnames `"`estnames', "`stub'`imp'""' di as res "`stub'`imp'" } // Ok... Run mata: rubin() tempname bmat Vmat Bmat Wmat RVI df fmi re pise mata: rubin((`estnames'), "`bmat'", "`Vmat'", "`Bmat'", "`Wmat'", "`RVI'", "`df'", "`fmi'", "`re'", "`pise'"); if "`colnames'" == "" local colnames : colfullnames e(b) // di "`colnames'" foreach mat in bmat Vmat Bmat Wmat RVI df fmi re pise { matrix colnames ``mat'' = `colnames' } foreach mat in Vmat Bmat Wmat { matrix rownames ``mat'' = `colnames' } // Save copy of bmat and Vmat tempname bmat2 Vmat2 matrix `bmat2' = `bmat' matrix `Vmat2' = `Vmat' if "`repost'" == "repost" ereturn repost b = `bmat' V = `Vmat' else ereturn post `bmat' `Vmat' // ereturn display ereturn matrix b_mi = `bmat2' ereturn matrix V_mi = `Vmat2' ereturn matrix B_mi = `Bmat' ereturn matrix W_mi = `Wmat' ereturn matrix rvi_mi = `RVI' ereturn matrix df_mi = `df' ereturn matrix fmi_mi = `fmi' ereturn matrix re_mi = `re' ereturn matrix pise_mi = `pise' ereturn local cmd mirubin ereturn local depvar `depvar' ereturn local mi mi _coef_table end mata: void rubin(string rowvector matnames, string scalar bmatname, string scalar Vmatname, string scalar Bmatname, string scalar Wmatname, string scalar RVIname, string scalar dfname, string scalar fminame, string scalar rename, string scalar pisename) { n = length(matnames); for(i=1;i<=n;i++) { command = "qui est restore " + matnames[i] stata(command); b = st_matrix("e(b)"); V = st_matrix("e(V)"); df_r = st_numscalar("e(df_r)") if (i==1) { _b = b; _V_within = V; _df_r = df_r; } else { _b = (_b \ b) ; _V_within = _V_within + V; _df_r = _df_r \ df_r; } } _b_mean = colsum(_b) :/ n ; _V_within = _V_within :/ n _b_centred = _b :- J(n, 1,1) * _b_mean _V_between = _b_centred' * _b_centred :/ (n - 1) VV = _V_within + (1 + 1/n) :* _V_between ; bb = _b_mean ; st_matrix(bmatname, bb) ; st_matrix(Vmatname, VV) ; st_matrix(Bmatname, _V_between) ; st_matrix(Wmatname, _V_within) ; // Calculate RVI: Relative Variance Increase B = diagonal(_V_between)' Ubar = diagonal(_V_within)' T = diagonal(VV)' RVI = B :/ Ubar :* (1 + 1/n) st_matrix(RVIname, RVI) // Calculate Degree of freedom df = (n - 1) :* (1 :+ 1 :/ RVI):^2 st_matrix(dfname, df) // FMI: Fraction of information due to nonresponse FMI = (RVI + 2 :/ (df :+ 3)) :/ (RVI :+ 1); st_matrix(fminame, FMI); // Relative efficiency (of M imputations vs. infinite) re = 1 :/ (1 :+ FMI :/ n) st_matrix(rename, re) // PISE: Percentage increase in Standard error pise = (sqrt(T :/ Ubar) :- 1) :* 100 st_matrix(pisename, pise); /* small sample */ nosmall = st_local("small") _DF_R = min(_df_r); if (nosmall == "" & _DF_R < .) { // df gamma = (1 + 1/n) :* B :/ T nuc = _DF_R * (_DF_R + 1) :* (1 :- gamma) :/ (_DF_R +3) df_small = 1:/(1 :/ df + 1 :/ nuc) st_matrix(dfname, df_small) // FMI numerator = (df_small :+1) :/ (df_small :+3) :* Ubar denom = (_DF_R +1) / (_DF_R + 3) :* T FMI_small = 1 :- numerator :/ denom st_matrix(fminame, FMI_small) // Relative efficiency re_small = 1 :/ (1 :+ FMI_small :/ n) st_matrix(rename, re_small) }; } end