*! 1.0.0 Ariel Linden 18Apr2026 program define mlm_gof, rclass version 16 syntax [, Groups(integer -1)] local ngroups = `groups' // clean up any stale tempvar group indicator variables left by a previously failed run capture quietly drop __0000* // verify that melogit was the last estimation model local iscmd = ("`e(cmd)'" == "meglm") local iscmd2 = ("`e(cmd2)'" == "melogit") if !`iscmd' & !`iscmd2' { di as error "the current estimation results must be from {bf:melogit}" exit 321 } local depvar = e(depvar) local indepvars = e(covariates) local all_ivars = e(ivars) local n_levels = wordcount("`all_ivars'") + 1 if `n_levels' < 2 { di as error "mlm_gof requires a multilevel model with at least two levels" exit 301 } // reconstruct the random-effects specification from e(b) and e(cmdline) local cmdline = e(cmdline) local cmdline = strtrim(substr("`cmdline'", length("melogit ") + 1, .)) local pipe_pos = strpos("`cmdline'", " || ") if `pipe_pos' == 0 { di as error "Could not parse e(cmdline): no '||' found" exit 301 } local repart = strtrim(substr("`cmdline'", `pipe_pos' + 4, .)) local scan_rem "`repart'" while "`scan_rem'" != "" { // get next || segment local scan_pipe = strpos("`scan_rem'", " || ") if `scan_pipe' > 0 { local scan_seg = strtrim(substr("`scan_rem'", 1, `scan_pipe' - 1)) local scan_rem = strtrim(substr("`scan_rem'", `scan_pipe' + 4, .)) } else { local scan_seg = strtrim("`scan_rem'") local scan_rem = "" } // extract options after the comma in this segment local scan_comma = strpos("`scan_seg'", ",") if `scan_comma' > 0 { local seg_opts = strtrim(substr("`scan_seg'", `scan_comma' + 1, .)) // keep only options that are NOT cov(...) -- those are RE-specific if substr(strtrim("`seg_opts'"), 1, 4) != "cov(" { // no cov() prefix: all options after comma are global local est_opts = strtrim("`est_opts' `seg_opts'") } else { // cov() present: strip it and keep the rest local cov_end = strpos("`seg_opts'", ")") if `cov_end' < length("`seg_opts'") { local after_cov = strtrim(substr("`seg_opts'", `cov_end' + 1, .)) // strip leading comma if present if substr("`after_cov'", 1, 1) == "," { local after_cov = strtrim(substr("`after_cov'", 2, .)) } local est_opts = strtrim("`est_opts' `after_cov'") } } } } // parse cov() option from each || segment local n_re_segs = 0 local remaining "`repart'" while "`remaining'" != "" { local next_pipe = strpos("`remaining'", " || ") if `next_pipe' > 0 { local this_seg = strtrim(substr("`remaining'", 1, `next_pipe' - 1)) local remaining = strtrim(substr("`remaining'", `next_pipe' + 4, .)) } else { local this_seg = strtrim("`remaining'") local remaining = "" } local ++n_re_segs // extract ONLY the cov() option for this segment local re_opt`n_re_segs' = "" local comma_pos = strpos("`this_seg'", ",") if `comma_pos' > 0 { local seg_after = strtrim(substr("`this_seg'", `comma_pos' + 1, .)) // keep only cov(...) -- it is RE-specific and belongs in re_spec if substr("`seg_after'", 1, 4) == "cov(" { // extract just the cov(...) token local cov_end = strpos("`seg_after'", ")") local re_opt`n_re_segs' = ", " + substr("`seg_after'", 1, `cov_end') } // non-cov options are global and already in est_opts -- skip them here } } // parse e(b) var() entries to get slope TERMs per level tempname b_mat matrix `b_mat' = e(b) local np = colsof(`b_mat') local n_reterms = 0 local re_gvars_abbr = "" // abbreviated groupvars from e(b), for matching forvalues k = 1/`np' { local nm : word `k' of `: colnames `b_mat'' if substr("`nm'", 1, 4) == "var(" { local ++n_reterms // parse var(TERM[GROUPVAR_abbr]) -> extract TERM and abbreviated GROUPVAR local inner = substr("`nm'", 5, length("`nm'") - 5) local brack = strpos("`inner'", "[") local re_term`n_reterms' = strtrim(substr("`inner'", 1, `brack' - 1)) local gv_abbr = strtrim(substr("`inner'", `brack' + 1, length("`inner'") - `brack' - 1)) local re_gvar_abbr`n_reterms' = "`gv_abbr'" // track unique abbreviated groupvars in order (for position matching) if !`:list gv_abbr in re_gvars_abbr' { local re_gvars_abbr = "`re_gvars_abbr' `gv_abbr'" } } } // reassemble re_spec using FULL groupvar names from e(ivars) local seg_idx = 0 local n_abbr_gvars : word count `re_gvars_abbr' forvalues seg = 1/`n_abbr_gvars' { local ++seg_idx local gv_abbr : word `seg' of `re_gvars_abbr' // full groupvar name from e(ivars) by position local gv_full : word `seg' of `all_ivars' // collect slope terms for this groupvar (exclude _cons) local slopes_this = "" forvalues t = 1/`n_reterms' { if "`re_gvar_abbr`t''" == "`gv_abbr'" & "`re_term`t''" != "_cons" { // use the expanded term directly (e.g. 1.urban) local slopes_this = "`slopes_this' `re_term`t''" } } local slopes_this = strtrim("`slopes_this'") // get cov() option for this segment from e(cmdline) parse local opt_this = "`re_opt`seg_idx''" // build: || full_groupvar: [slopes] [, cov(...)] local re_spec = "`re_spec' || `gv_full': `slopes_this'`opt_this'" } local re_spec = strtrim("`re_spec'") // cluster size and group number checks tempvar touse quietly gen byte `touse' = e(sample) quietly count if `touse' local N = r(N) if `ngroups' != -1 { if `ngroups' < 2 { di as error "groups() must be >= 2" exit 198 } if `ngroups' >= floor(`N' / 10) { di as error "groups(`ngroups') too large for n=`N'; must be < " /// floor(`N' / 10) exit 198 } } local _any_warn = 0 local _nwarn = 0 if `n_levels' > 2 { forvalues _lv = 1/`=`n_levels'-2' { local _outer = word("`all_ivars'", `_lv') local _inner = word("`all_ivars'", `_lv' + 1) local _odepth = `n_levels' - `_lv' + 1 local _idepth = `n_levels' - `_lv' tempvar _n_inner quietly bysort `touse' `_outer': /// egen long `_n_inner' = nvals(`_inner') if `touse' quietly summarize `_n_inner' if `touse', meanonly local _min_inner = r(min) local _max_inner = r(max) if `_max_inner' < 2 { di as error "Every level-`_odepth' cluster contains only one level-`_idepth' unit" di as error " The test cannot be run." exit 459 } if `_min_inner' < 2 { local _any_warn = 1 local _nwarn = `_nwarn' + 1 local _warn`_nwarn' "Warning: Some level-`_odepth' clusters contain only one level-`_idepth' unit." local _warn`_nwarn'b " Results should be interpreted with caution." } else if `_min_inner' < 5 { local _any_warn = 1 local _nwarn = `_nwarn' + 1 local _warn`_nwarn' "Warning: Some level-`_odepth' clusters contain fewer than 5 level-`_idepth' units (minimum = `_min_inner')." local _warn`_nwarn'b " Results should be interpreted with caution." } } } tempvar _cell_n_check quietly bysort `touse' `all_ivars': gen long `_cell_n_check' = _N if `touse' quietly summarize `_cell_n_check' if `touse', meanonly local min_cell_n = r(min) if `ngroups' == -1 { // data-driven default: G = min(10, min_cell_n) local ngroups = min(10, `min_cell_n') // note if G was reduced below 10 if `min_cell_n' < 10 & `min_cell_n' >= 2 { local _any_warn = 1 local _nwarn = `_nwarn' + 1 local _warn`_nwarn' "Note: The number of groups was set to `ngroups' (the minimum level-2 cluster cell size)" local _warn`_nwarn'b " because some clusters contain fewer than 10 observations." } } else { // user-specified G: warn if G exceeds minimum cell size, but still attempt the test if `ngroups' > `min_cell_n' { local _any_warn = 1 local _nwarn = `_nwarn' + 1 local _warn`_nwarn' "Warning: groups(`ngroups') exceeds the minimum level-2 cluster cell size (`min_cell_n' obs)." local _warn`_nwarn'b " Some smaller clusters cannot be divided into `ngroups' groups; results should be interpreted with caution." } } if `ngroups' < 2 { di as error "The smallest cluster cell has only 1 observation. The test cannot be run." exit 459 } // header di as txt _n "Goodness-of-fit test after `n_levels'-level " /// "mixed-effects logistic regression with random effects" di as txt "Variable: {bf:`depvar'}" di as txt "" // conditional predicted probabilities tempname base_est quietly estimates store `base_est' tempvar xbhat phat quietly predict `xbhat' if `touse', xb forvalues t = 1/`n_reterms' { tempvar _re`t' local relist "`relist' `_re`t''" } quietly predict `relist', reffects quietly gen double `phat' = `xbhat' if `touse' forvalues t = 1/`n_reterms' { quietly replace `phat' = `phat' + `_re`t'' if `touse' } quietly replace `phat' = invlogit(`phat') if `touse' // sort, assign group labels, create indicators tempvar orig_order cell_n gi rank_w grp quietly gen long `orig_order' = _n quietly bysort `touse' `all_ivars': gen long `cell_n' = _N if `touse' quietly gen long `gi' = floor(`cell_n' / `ngroups') if `touse' sort `touse' `all_ivars' `phat' `orig_order' quietly by `touse' `all_ivars': gen long `rank_w' = _n if `touse' quietly gen byte `grp' = min(`ngroups', ceil(`rank_w' / `gi')) if `touse' sort `orig_order' local indvars "" forvalues g = 2/`ngroups' { tempvar ind`g' quietly gen byte `ind`g'' = (`grp' == `g') if `touse' local indvars `indvars' `ind`g'' } // augmented model re-fit — use preserve/restore to guarantee clean data state local _aug_ok = 1 local chi2stat = . local df_test = . local pvalue = . preserve if "`est_opts'" != "" { local scan_pos = 1 while strpos(substr("`re_spec'", `scan_pos', .), " || ") > 0 { local scan_pos = `scan_pos' + strpos(substr("`re_spec'", `scan_pos', .), " || ") + 3 } local last_seg_only = strtrim(substr("`re_spec'", `scan_pos', .)) local last_seg_has_comma = (strpos("`last_seg_only'", ",") > 0) if `last_seg_has_comma' { capture quietly melogit `depvar' `indepvars' `indvars' if `touse' `re_spec' `est_opts' } else { capture quietly melogit `depvar' `indepvars' `indvars' if `touse' `re_spec', `est_opts' } } else { capture quietly melogit `depvar' `indepvars' `indvars' if `touse' `re_spec' } if _rc != 0 | "`e(converged)'" == "0" { local _aug_ok = 0 } // joint Wald test if `_aug_ok' { forvalues g = 2/`ngroups' { local test_expr `"`test_expr' (`ind`g'' = 0)"' } capture quietly test `test_expr' if _rc != 0 { local _aug_ok = 0 } else { local chi2stat = r(chi2) local df_test = r(df) local pvalue = r(p) if missing(`chi2stat') local _aug_ok = 0 } } restore // always restore baseline estimates after preserve/restore quietly estimates restore `base_est' quietly estimates drop `base_est' // if augmented model failed or returned missing results, exit cleanly if !`_aug_ok' | missing(`chi2stat') { di as error "{bf:mlm_gof} could not produce a result." di as error "This typically means that {bf:groups(`ngroups')} is set too large for the available data." di as error "Omit {bf:groups()} to use the data-driven default, or specify a smaller value of {bf:groups()}." exit 430 } // display results local Gm1 = `ngroups' - 1 local _lbl "Number of observations" di as txt "`_lbl'" _dup(`=30-length("`_lbl'")') " " " = " as res %9.0f `N' forvalues _lv = `=`n_levels'-1'(-1)1 { local _lvar = word("`all_ivars'", `_lv') local _ldepth = `n_levels' - `_lv' + 1 tempvar _nclust quietly egen long `_nclust' = nvals(`_lvar') if `touse' quietly summarize `_nclust' if `touse', meanonly local _nc = r(mean) local _lbl "Number of level-`_ldepth' clusters" di as txt "`_lbl'" _dup(`=30-length("`_lbl'")') " " " = " as res %9.0f `_nc' } local _lbl "Number of groups" di as txt "`_lbl'" _dup(`=30-length("`_lbl'")') " " " = " as res %9.0f `ngroups' local _lbl "Wald chi2({bf:`Gm1'})" di as txt "`_lbl'" _dup(`=35-length("`_lbl'")') " " " = " as res %9.4f `chi2stat' local _lbl "Prob > chi2" di as txt "`_lbl'" _dup(`=30-length("`_lbl'")') " " " = " as res %9.4f `pvalue' di as txt "" if `pvalue' >= 0.05 { di as txt " {bf:Model fits the data adequately}" } else { di as txt " {bf:Model has a questionable fit}" } if `_any_warn' { di as txt "" forvalues _w = 1/`_nwarn' { di as txt "`_warn`_w''" di as txt "`_warn`_w'b'" } } // return results return scalar chi2 = `chi2stat' return scalar df = `df_test' return scalar p = `pvalue' return scalar N = `N' return scalar groups = `ngroups' return scalar levels = `n_levels' return scalar reterms = `n_reterms' end