*!version5.4 07Sep2023 /* ----------------------------------------------------------------------------- ** PROGRAM NAME: xtgeebcv ** VERSION: 5.4 ** DATE: SEPTEMBER 07, 2023 ** ----------------------------------------------------------------------------- ** CREATED BY: JOHN GALLIS, FAN LI, LIZ TURNER ** ----------------------------------------------------------------------------- ** PURPOSE: To facilitate the computation of finite-sample corrected standard errors ** in generalized estimating equations (GEE) models. ** ----------------------------------------------------------------------------- ** UPDATES: Sep 10, 2019 - For easier coding and postestimation, require user to dummy code his/her categorical variables; removing "categorical" option Sep 20, 2019 - std_err option changed to stderr - stderr option arguments changed to lowercase - default family changed to binomial - added esample() to ereturn post to allow for postestimation - subset the regression with touse, rather than the dataset Jan 24, 2020 - Bug fix: Added "level" option to _coef_table. Mar 02, 2020 - Major update: Program now allows for factor variables in the regression, and also allows for postestimation, for example, using the "margins" command. Also, does not require outcome variable to be specified. Mar 03, 2020 - Updated to remove missing values in data read into Mata and in the "beginend" matrix. Mar 04, 2020 - Finalized code and applied bug fixes; added warning if degrees of freedom for t distribution is less than or equal to 0. Aug 17, 2020 - This version only works on Stata 16.1 or later; use Stata Journal version for functionality with version 15. Aug 25, 2020 - Adding functionality for the offset term to be included in the calculations. Apr 05, 2021 - Modifying the trace in the MBN correction so it lines up with what is in Morel et al. (2003) rather than what is in Li and Redden (2015) - Only slightly changes the standard error. Dec 03, 2021 - Program wasn't allowing for intercept-only models. Now updated to allow for intercept-only model. May 23, 2023 - Updating program to allow for different ways of computing degrees of freedom Aug 03, 2023 - Updating program to allow for abbreviations in the family and link names, and abbreviations of options Sep 07, 2023 - Program updated to re-output scalars and macros necessary for proper functioning of postestimation commands such as margins ** ** ----------------------------------------------------------------------------- ** OPTIONS: SEE HELP FILE ** ----------------------------------------------------------------------------- */ program define xtgeebcv, eclass version 16.1 #delimit ; syntax varlist(fv min=1) [if] [in], CLuster(varname) [Family(string) Link(string) STDerr(string) STATistic(string) corr(string) DFMethod(string) DFSpec(integer 0) eform *] ; #delimit cr marksample touse /* \\\\\\\ SET STRING DEFAULTS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ if "`family'" == "" local family "binomial" else if inlist("`family'","b","bi","bin","bino","binom","binomi","binomia","binomial") local family "binomial" else if inlist("`family'","g","ga","gau","gaus","gauss","gaussi","gaussia","gaussian") local family "gaussian" else if inlist("`family'","nor","norm","norma","normal") local family "gaussian" else if inlist("`family'","p","po","poi","pois","poiss","poisso","poisson") local family "poisson" else { di as error "Invalid family specification for xtgeebcv; see help file for options." exit 198 } if "`link'" == "" & "`family'"=="binomial" local link "logit" else if "`link'" == "" & "`family'"=="poisson" local link "log" else if "`link'" == "" & "`family'"=="gaussian" local link "identity" else if inlist("`link'","i","id","ide","iden","ident","identi","identit","identity") local link "identity" else if inlist("`link'","log") local link "log" else if inlist("`link'","logi","logit") local link "logit" else { di as error "Invalid link specification for xtgeebcv; see help file for options." exit 198 } if "`family'" == "gaussian" & "`link'" == "identity" { } else if "`family'" == "poisson" & inlist("`link'","log","identity") { } else if "`family'" == "binomial" & inlist("`link'","log","logit","identity") { } else { di as error "Invalid family-link combination for xtgeebcv; see help file for options." exit 198 } if "`stderr'" == "" local stderr "kc" if "`stderr'" != "rb" & "`stderr'" != "df" & "`stderr'" != "kc" & "`stderr'" != "md" & "`stderr'" != "fg" & "`stderr'" != "mbn" { di as err "Invalid standard error specification; see help file for options." exit 198 } if "`statistic'" == "" local statistic "t" if "`statistic'" != "t" & "`statistic'" != "z" { di as err "Invalid statistic specification; see help file for options." exit 198 } if "`corr'" == "" local corr "exch" if "`dfmethod'" != "" & "`statistic'" == "z" { di as error "Note: dfmethod option not used when statistic = z" } if "`dfmethod'" == "" local dfmethod "cluster" if "`dfmethod'" != "specify" & "`dfmethod'" != "all" & "`dfmethod'" != "cluster" { di as error "Invalid df method selected; see help file for options." exit 198 } /* \\\\\ CREATING TEMPFILE SO THAT THE USER GETS THEIR FULL DATASET BACK AT THE END \\\\ \\\\\\\\ EVEN IF MISSING VALUES ARE DELETED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ /* \\\\\ DROP MISSING VALUES IN THE REGRESSION VARIABLES \\\\\\\\\\ */ qui count local N = r(N) * working with factor variables fvrevar `varlist', stub(_x_) local newvarlist = "`r(varlist)'" fvexpand `varlist' *for identifying position of 0's local oldvarlist = "`r(varlist)'" local colnames = "" local subtractlist = "" local i = 1 foreach wrd in `r(varlist)' { local tmpvar1: word `i' of `newvarlist' local tmpvar2: word `i' of `oldvarlist' if strpos("`wrd'", "b.") > 0 { local addlist "`tmpvar1'" local add2list "`tmpvar2'" local position: list posof `"`tmpvar2'"' in oldvarlist local position = `position' - 1 local subtractlist : list subtractlist | addlist local positionlist : list positionlist | position } else { local colnames `colnames' `wrd' } local i = `i' + 1 } *save varlist for dropping extraneous variables later local newvarlistb = "`newvarlist'" *newvarlist only contains non-zero parameters local newvarlist: list newvarlist - subtractlist * number of cluster-level covariates for degrees of freedom adjustment preserve collapse (sd) `newvarlist', by(`cluster') qui drop `cluster' collapse (sum) `newvarlist' foreach var in `newvarlist' { if `var' != 0 { qui drop `var' } } qui des, short local ncoef2 `r(k)' *add 1 for the intercept local ncoef2 = `ncoef2' + 1 restore /* \\\\\ INFORMATIONAL MESSAGES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ local famlink: list family | link di " " di as text "Note: Family is `family' and link is `link'" if substr("`corr'",1,3) == "exc" { di "Using exchangeable working correlation" } else if substr("`corr'",1,3) == "ind" { di "Using independent working correlation" } else { di as error "Invalid working correlation specification. Only exchangeable (exch) and independent (ind) are supported at this time" exit 198 } di as text "with scale parameter divided by K - p" /* \\\\\ CODE TO ADD _INTERCEPT TO VARLIST \\\\\\\\\\\\\\\\\\\\\\\\\\ */ tempname _intercept gen `_intercept' = 1 local add "`_intercept'" /* \\\\\ RUN STANDARD GEE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ xtset `cluster' xtgee `varlist' if `touse', family(`family') link(`link') corr(`corr') nmp `eform' `options' local level=r(level) /* \\\\\ OFFSET VARIABLE (IF USED) \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ if "`e(offset)'" == "" { tempname _offsetvar_ gen `_offsetvar_' = 0 local offsetvar = "`_offsetvar_'" } else if substr("`e(offset)'",1,3)=="ln(" { local offsetvar1 = subinstr("`e(offset)'","ln(","",1) local offsetvar = subinstr("`offsetvar1'",")","",3) tempname _offsetvar_ gen `_offsetvar_' = ln(`offsetvar') local offsetvar = "`_offsetvar_'" } else if substr("`e(offset)'",1,3)!="ln(" { local offsetvar = "`e(offset)'" capture drop _offsetvar_ tempname _offsetvar_ gen `_offsetvar_' = `offsetvar' local offsetvar = "`_offsetvar_'" } tempname b V noomit matrix `b' = e(b) matrix `V' = e(V) _ms_omit_info `b' local cols = colsof(`b') matrix `noomit' = J(1,`cols',1) - r(omit) mata: newV = select(st_matrix(st_local("V")),(st_matrix(st_local("noomit")))) mata: newV = select(newV, (st_matrix(st_local("noomit")))') mata: st_matrix(st_local("V"),newV) /* reduce matrix b */ mata: newB = select(st_matrix(st_local("b")),(st_matrix(st_local("noomit")))) mata: st_matrix(st_local("b"),newB) /* \\\\\\\\\\ WORKING CORRELATION MATRIX AND BETA (REGRESSION ESTIMATES) MATRIX \\\\ */ matrix R = e(R) matrix Beta = `b' * because of error(?) in Mata, have to pass the unchanged Beta matrix to Mata matrix Beta2 = `b' /* \\\\\\\\\\\ PUT _CONS AT BEGINNING OF BETA MATRIX \\\\\\\\\\\\\\ */ preserve drop _all qui svmat Beta qui codebook loc lastvar: word `c(k)' of `r(cons)' order `lastvar' qui mkmat _all, matrix(Beta) restore /* \\\\\\ WORKING CORRELATION AND THE SCALE PARAMETER (PHI) \\\\\\\\ */ local wcorr = R[2,1] local phi = e(phi) /* \\\\\\\ SPECIFY OUTCOME \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ local outcome: word 1 of `newvarlist' /* \\\\\\ ADD _INTERCEPT TO THE BEGINNING OF NEWVARLIST \\\\\\\\\\\\\ */ qui local newvarlist2="`newvarlist'" qui local newvarlist : list add | newvarlist /* \\\\\\\ ADD OFFSET TO THE END OF NEWVARLIST \\\\\\\\\\\\\\\\\\\\\\\ */ qui local newvarlist : list newvarlist | offsetvar /* \\\\\\ SORT THE CLUSTERS THEN CREATE A NEW CLUSTER ID VARIABLE WHICH IS \\\\\\\\\\\\ \\\\\\\\\ SEQUENTIAL \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ sort `cluster' tempname _newclustid egen `_newclustid' = group(`cluster') preserve /* \\\\\\\\\\ OBTAIN NUMBER IN EACH CLUSTER \\\\\\\\\\\\\\\\\\\\ */ * Update 3/3/2020 - remove missing values local i = 1 foreach x in `newvarlist2' { local tmpvar1: word `i' of `newvarlist2' capture confirm numeric variable `tmpvar1' if !_rc { quietly drop if `tmpvar1' == . } else { quietly drop if `tmpvar1' == "" } local i = `i'+1 } qui tab `_newclustid', matcell(clust) /* \\\\\\\\\\ OBTAINING LOCATION OF FIRST AND LAST OBSERVATION \\\\\\\\\\\\\\\\\\ \\\\\\\\\\\\\ WITHIN EACH CLUSTER \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ capture { drop freq drop cumfreq drop _first drop _last drop tag_cluster } qui bys `_newclustid': gen freq = _N qui bys `_newclustid': gen cumfreq = _N if _n == 1 qui replace cumfreq = sum(cumfreq) qui gen _first=cumfreq-(freq-1) qui gen _last=cumfreq qui egen tag_cluster = tag(`_newclustid') qui keep if tag_cluster == 1 qui mkmat _first _last, matrix(beginend) restore /* \\\\\\\\\\\\\\\\\\\\\\\\\\\ RUN MATA PROGRAM \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ */ mata: gee("`newvarlist'","beginend","clust","Beta","Beta2",`wcorr',`phi',"`famlink'") /* \\\\\\\ ADD INTERCEPT TO END OF VARLIST \\\\\\\\\\\\ */ qui local newvarlist : list newvarlist - add qui local add = "_cons" qui local newvarlist : list newvarlist | add /* \\\\\\\\\\\\\\\\\\ OUTPUTTING THE REQUESTED VARIANCE TO E(V) \\\\\\\\\\\\\\\ \\\\\\\\\\\\\\\ AND UPDATING REGRESSION TABLE WITH CORRECTED STANDARD ERRORS \\*/ /* \\\\\\\\\\\ Robust, df, kc, md, fg, AND mbn CORRECTIONS \\\\\\\ */ local bcvs "rb df kc md fg mbn" local names ""Robust" "Degrees-of-Freedom" "Kauermann-Carroll" "Mancl-DeRouen" "Fay-Graubard" "Morel-Bokossa-Neerchal"" local n: word count `bcvs' forvalues i=1/`n' { local bcv: word `i' of `bcvs' local name: word `i' of `names' if "`stderr'" == "`bcv'" { /* \\\\\\\ DEGREES OF FREEDOM FOR THE T DISTRIBUTION \\\\\\\\ */ if "`dfmethod'" == "all" { local dof=nclust[1,1]-ncoef[1,1] } else if "`dfmethod'" == "cluster" { local dof=nclust[1,1]-`ncoef2' } else if "`dfmethod'" == "specify" { local dof=nclust[1,1]-`dfspec' } local nclust = nclust[1,1] local ncoef = ncoef[1,1] /* \\\\\\ UPDATE STANDARD ERRORS WITH BIAS CORRECTION \\\\\\ */ *ADD ONE ROW TO POSITIONLIST FOR THE LAST ROW IN THE MATRIX local last = rowsof(e(V))+1 local positionlist `"`positionlist' `last'"' local positionsize: list sizeof positionlist * check for a position-list length of 1; if 1, then there are no factor variables in the model if "`positionsize'" == "1" { matrix var`bcv'final2 = var`bcv' } else { /* ADDING ZEROS BACK TO ROWS IN VARIANCE MATRIX */ local i=1 foreach wrd in `positionlist' { local j=`i'-1 local t: word `i' of `positionlist' local t0 = `t' - `i' if `i' == 1 { if `t0' == 0 { local cols=colsof(var`bcv') matrix var`bcv'0s = J(1,`cols',0) matrix var`bcv'final=var`bcv'0s } else { matrix var`bcv'`t0' = var`bcv'[1..`t0',1...] local cols=colsof(var`bcv'`t0') matrix var`bcv'0s = J(1,`cols',0) matrix var`bcv'final=(var`bcv'`t0'\var`bcv'0s) } } else if `i' < `positionsize' { local tminus1: word `j' of `positionlist' *If there are multiple sequential numbers in the position list *then we must add the same number of zero columns in a row if `t' == `tminus1'+1 { local cols=colsof(var`bcv'final) matrix var`bcv'0s = J(1,`cols',0) matrix var`bcv'final=(var`bcv'final\var`bcv'0s) } else { local j2=`j'-1 local t0minus1 = `tminus1' - `j2' matrix var`bcv'`t0' = var`bcv'[`t0minus1'..`t0',1...] local cols=colsof(var`bcv'`t0') matrix var`bcv'0s = J(1,`cols',0) matrix var`bcv'final=(var`bcv'final\var`bcv'`t0'\var`bcv'0s) } } else if `i' == `positionsize' { local tminus1: word `j' of `positionlist' local j2=`j'-1 local t0minus1 = `tminus1' - `j2' matrix var`bcv'`t0' = var`bcv'[`t0minus1'..`t0',1...] local cols=colsof(var`bcv'`t0') matrix var`bcv'0s = J(1,`cols',0) matrix var`bcv'final=(var`bcv'final\var`bcv'`t0') } local ++i } /* ADDING ZEROS BACK TO COLUMNS IN VARIANCE MATRIX */ local i=1 foreach wrd in `positionlist' { local j=`i'-1 local t: word `i' of `positionlist' local t0 = `t' - `i' if `i' == 1 { if `t0' == 0 { local rows=rowsof(var`bcv'final) matrix var`bcv'0s = J(`rows',1,0) matrix var`bcv'final2=var`bcv'0s } else { matrix var`bcv'`t0' = var`bcv'final[1...,1..`t0'] local rows=rowsof(var`bcv'`t0') matrix var`bcv'0s = J(`rows',1,0) matrix var`bcv'final2=(var`bcv'`t0',var`bcv'0s) } } else if `i' < `positionsize' { local tminus1: word `j' of `positionlist' if `t' == `tminus1'+1 { local rows=rowsof(var`bcv'final) matrix var`bcv'0s = J(`rows',1,0) matrix var`bcv'final2=(var`bcv'final2,var`bcv'0s) } else { local j2=`j'-1 local t0minus1 = `tminus1' - `j2' matrix var`bcv'`t0' = var`bcv'final[1...,`t0minus1'..`t0'] local rows=rowsof(var`bcv'`t0') matrix var`bcv'0s = J(`rows',1,0) matrix var`bcv'final2=(var`bcv'final2,var`bcv'`t0',var`bcv'0s) } } else if `i' == `positionsize' { local tminus1: word `j' of `positionlist' local j2=`j'-1 local t0minus1 = `tminus1' - `j2' matrix var`bcv'`t0' = var`bcv'final[1...,`t0minus1'..`t0'] local rows=rowsof(var`bcv'`t0') matrix var`bcv'0s = J(`rows',1,0) matrix var`bcv'final2=(var`bcv'final2,var`bcv'`t0') } local ++i } } matrix bb=e(b) /* \\\\\\ UPDATE STANDARD ERRORS WITH BIAS CORRECTION \\\\\\ */ ereturn repost b=bb V = var`bcv'final2, resize /* \\\\\\\ OUTPUT P-VALUES USING T-STATISTIC \\\\\\\\\\\\\\\\\\\\\\\\\ */ if "`statistic'" == "t" { matrix b=e(b) matrix V=e(V) /* re-adding estimation results after post, so that margins and other postestimation commands work correctly */ foreach xyz in cmdline depvar cmd marginsnotok predict estat_cmd link corr family ivar model scale { local `xyz'1 = e(`xyz') } foreach xyz in df_m chi2 p df_pear N N_g g_max g_min g_avg tol dif phi deviance dispers chi2_dev chi2_dis rc rank { local `xyz'2 = e(`xyz') } ereturn post b V, dof(`dof') esample(`touse') buildfv ereturn matrix varNaive = varNaive foreach xyz in cmdline depvar cmd marginsnotok predict estat_cmd link corr family ivar model scale { ereturn local `xyz' "``xyz'1'" } foreach xyz in df_m chi2 p df_pear N N_g g_max g_min g_avg tol dif phi deviance dispers chi2_dev chi2_dis rc rank { ereturn scalar `xyz' = ``xyz'2' } if "`bcv'" != "rb" ereturn matrix varrb = varrb if "`bcv'" != "df" ereturn matrix vardf = vardf if "`bcv'" != "kc" ereturn matrix varkc = varkc if "`bcv'" != "md" ereturn matrix varmd = varmd if "`bcv'" != "fg" ereturn matrix varfg = varfg if "`bcv'" != "mbn" ereturn matrix varmbn = varmbn if "`bcv'" != "rb" { di as text " " di as text "`name' bias-corrected standard errors" if "`dfmethod'" == "all" { di as text "t-statistic with K - p (`nclust' - `ncoef' = `dof') degrees of freedom" di as text "where K is the number of clusters and p is the number of covariates plus the intercept" } else if "`dfmethod'" == "cluster" { di as text "t-statistic with K - p (`nclust' - `ncoef2' = `dof') degrees of freedom" di as text "where K is the number of clusters and p is the number of cluster-level covariates plus the intercept" } else if "`dfmethod'" == "specify" { di as text "t-statistic with K - p (`nclust' - `dfspec' = `dof') degrees of freedom" di as text "where K is the number of clusters and p is specified by the user" } else { di as text "t-statistic with K - p (`nclust' - `ncoef2' = `dof') degrees of freedom" di as text "where K is the number of clusters and p is the number of cluster-level covariates plus the intercept" } if `dof' <= 0 { di as error "Warning: Degrees of freedom for t distribution ≤ 0" } } else { di as text " " di as text "Robust standard errors not multiplied by √K/(K-1)" if "`dfmethod'" == "all" { di as text "t-statistic with K - p (`nclust' - `ncoef' = `dof') degrees of freedom" di as text "where K is the number of clusters and p is the number of covariates plus the intercept" } else if "`dfmethod'" == "cluster" { di as text "t-statistic with K - p (`nclust' - `ncoef2' = `dof') degrees of freedom" di as text "where K is the number of clusters and p is the number of cluster-level covariates plus the intercept" } else if "`dfmethod'" == "specify" { di as text "t-statistic with K - p (`nclust' - `dfspec' = `dof') degrees of freedom" di as text "where K is the number of clusters and p is specified by the user" } else { di as text "t-statistic with K - p (`nclust' - `ncoef2' = `dof') degrees of freedom" di as text "where K is the number of clusters and p is the number of cluster-level covariates plus the intercept" } if `dof' <= 0 { di as error "Warning: Degrees of freedom for t distribution ≤ 0" } } _coef_table, `eform' level(`level') } /* \\\\\\\ OUTPUT P-VALUES USING Z-STATISTIC \\\\\\\\\\\\\\\\\\\\\\\\\ */ else if "`statistic'"=="z" { matrix b=e(b) matrix V=e(V) foreach xyz in cmdline depvar cmd marginsnotok predict estat_cmd link corr family ivar model scale { local `xyz'1 = e(`xyz') } foreach xyz in df_m chi2 p df_pear N N_g g_max g_min g_avg tol dif phi deviance dispers chi2_dev chi2_dis rc rank { local `xyz'2 = e(`xyz') } ereturn post b V, esample(`touse') buildfv ereturn matrix varNaive = varNaive foreach xyz in cmdline depvar cmd marginsnotok predict estat_cmd link corr family ivar model scale { ereturn local `xyz' "``xyz'1'" } foreach xyz in df_m chi2 p df_pear N N_g g_max g_min g_avg tol dif phi deviance dispers chi2_dev chi2_dis rc rank { ereturn scalar `xyz' = ``xyz'2' } if "`bcv'" != "rb" ereturn matrix varrb = varrb if "`bcv'" != "df" ereturn matrix vardf = vardf if "`bcv'" != "kc" ereturn matrix varkc = varkc if "`bcv'" != "md" ereturn matrix varmd = varmd if "`bcv'" != "fg" ereturn matrix varfg = varfg if "`bcv'" != "mbn" ereturn matrix varmbn = varmbn if "`bcv'" != "rb" { di as text " " di as text "`name' bias-corrected standard errors" } else { di as text " " di as text "Robust standard errors not multiplied by √K/(K-1)" } _coef_table, `eform' level(`level') } } } *dropping extraneous variables to output dataset foreach var in `newvarlistb' { if substr("`var'",1,3) == "_x_" { drop `var' } } end /* ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||| MATA PROGRAM |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| */ mata: matrix gee(string scalar newvarlist, string scalar beginend, string scalar cluster, string scalar Beta, string scalar Beta2, scalar wcorr, scalar phi, string scalar famlink) { // READING DATA WITHOUT MISSING VALUES INTO MATA X2=st_data(.,tokens(newvarlist),0) // SUBSETTING TO DESIGN MATRIX WITHOUT OUTCOME (WHICH IS ROW 2) AND WITHOUT OFFSET VARIABLE (WHICH IS THE LAST ROW) colnum = cols(X2)-1 if (colnum >= 3) { X=X2[.,(1,3..colnum)] } // for intercept-only models else { X=X2[.,(1)] } // SUBSETTING TO OUTCOME COLUMN VECTOR y=X2[.,2] // SUBSETTING TO OFFSET VARIABLE COLUMN VECTOR offset=X2[.,cols(X2)] // CLUSTER Cnum=st_matrix(cluster) // BEGINNING AND END POINT OF EACH CLUSTER bend = st_matrix(beginend) // BETA COEFFICIENT MATRIX B = st_matrix(Beta)' // NUMBER OF COLUMNS IN X p=cols(X) // NUMBER OF CLUSTERS K=rows(Cnum) //https://www.stata.com/statalist/archive/2012-01/msg01032.html a=(0) b=(1..p) U=a#b' Ustar=J(p,p,0) UUtran=J(p,p,0) // SCORE FUNCTION START /////////////////////////////////////////////////////////////// for (i=1; i<=rows(Cnum); i++) { // X_c = cluster-specific design matrix X_c = X[bend[i,1]..bend[i,2],] // y_c = cluster-specific column vector of the outcome variable y_c = y[bend[i,1]..bend[i,2],] U_c=a#b' Ustar_c=J(p,p,0) // o_c = cluster-specific column vector of the offset variable o_c = offset[bend[i,1]..bend[i,2],] // mu_c = cluster-specific marginal mean of y_c // D = partial derivative of mu_c with respect to Beta; corresponds to D_i in GEE if (famlink=="binomial logit") { mu_c =(1:/(1 :+ exp(-1*(X_c*B :+ o_c)))) D=X_c:*(mu_c:*(1:-mu_c)) } else if (famlink == "binomial log" | famlink == "poisson log") { mu_c = exp(X_c*B :+ o_c) D=X_c:*mu_c } else if (famlink == "binomial identity" | famlink == "poisson identity" | famlink == "gaussian identity" | famlink == "normal identity") { mu_c = X_c*B :+ o_c D=X_c } // r = residuals y_c - mu_c r=y_c-mu_c M=J(Cnum[i],Cnum[i],wcorr/((1-wcorr)*(1-wcorr+Cnum[i]:*wcorr))) N=diag(J(1,Cnum[i],1/(1-wcorr))) // INVR = inverse of workin correlation matrix R INVR=N-M // INVV = inverse of the working variance matrix V // O corresponds to the inverse of A_c^1/2 // phi is the scale parameter // LOGIC FOR A SINGLETON CLUSTER if (rows(X_c) == 1) { if (famlink=="binomial logit" | famlink == "binomial log" | famlink == "binomial identity") { INVV=1/(mu_c*(1-mu_c)*phi) } else if (famlink == "poisson log" | famlink == "poisson identity") { INVV=1/(mu_c*phi) } else if (famlink == "gaussian identity" | famlink == "gaussian identity") { INVV=1/phi } } else { if (famlink=="binomial logit" | famlink == "binomial log" | famlink == "binomial identity") { O=diag(J(1,Cnum[i],1:/sqrt(mu_c:*(1:-mu_c)))) INVV = O*INVR*O/phi } else if (famlink == "poisson log" | famlink == "poisson identity") { O=diag(J(1,Cnum[i],1:/sqrt(mu_c))) INVV = O*INVR*O/phi } else if (famlink == "gaussian identity" | famlink == "gaussian identity") { INVV = INVR/phi } } // U_c = corresponds to D_i'V_i^-1(y_c - mu_c) in the estimating equations U_c = D'*INVV*r // UUtran = DVrrVD, the "meat" of the sandwich UUtran_c=U_c*U_c' // Ustar_c = DVD, the "bread" of the sandwich Ustar_c=D'*INVV*D U=U+U_c UUtran=UUtran+UUtran_c Ustar=Ustar+Ustar_c } // SCORE FUNCTION END //////////////////////////////////////////////////////////////////////////// //computations for naive, model-based estimator AHALF=cholesky(Ustar)' PINV=pinv(AHALF) AINV=PINV*PINV' // NAIVE STANDARD ERROR ////////////////////// naive=AINV ////////////////////////////////////////////// // ROBUST STANDARD ERROR ///////////////////// robust=naive*UUtran*naive' ////////////////////////////////////////////// // df BIAS-CORRECTED STANDARD ERROR ////////// df = (K/(K - p)):*robust ////////////////////////////////////////////// dbar = 1/K*U // mbn BIAS-CORRECTED STANDARD ERROR /////// nstar = rows(X) correction=(nstar-1)/(nstar-p)*(K/(K-1))*(UUtran-K*dbar*dbar') //April 5, 2021 - Trace updated to match Morel et al. (2003) (see equation 5) phi2 = rowmax((1,trace((nstar-1)/(nstar-p)*(K/(K-1))*naive*correction)/p)) delta=rowmin((0.5,p/(K - p))) //Sample-size correction for mbn //correction=(nstar-1)/(nstar-p)*(K/(K-1)) mbn = naive*correction*naive' + delta*phi2*naive ////////////////////////////////////////////// symeigensystem(naive,evec,eval) sqreval=sqrt(eval) sqe=evec*diag(sqreval) UUtran=UUbc=UUbc2=UUbc3=Ustar=J(p,p,0) // CORRECTIONS FOR kc, md, AND fg BIAS-CORRECTED STANDARD ERRORS, LOOP START /////////////////////////////// for (i=1; i<=rows(Cnum); i++) { X_c = X[bend[i,1]..bend[i,2],] y_c = y[bend[i,1]..bend[i,2],] U_c=a#b' // o_c = cluster-specific column vector of the offset variable o_c = offset[bend[i,1]..bend[i,2],] if (famlink=="binomial logit") { mu_c =(1:/(1 :+ exp(-1*(X_c*B :+ o_c)))) D=X_c:*(mu_c:*(1:-mu_c)) } else if (famlink == "binomial log" | famlink == "poisson log") { mu_c = exp(X_c*B :+ o_c) D=X_c:*mu_c } else if (famlink == "binomial identity" | famlink == "poisson identity" | famlink == "gaussian identity" | famlink == "normal identity") { mu_c = X_c*B :+ o_c D=X_c } r=y_c-mu_c M=J(Cnum[i],Cnum[i],wcorr/((1-wcorr)*(1-wcorr+Cnum[i]:*wcorr))) N=diag(J(1,Cnum[i],1/(1-wcorr))) INVR=N-M if (famlink=="binomial logit" | famlink == "binomial log" | famlink == "binomial identity") { O=diag(J(1,Cnum[i],1:/sqrt(mu_c:*(1:-mu_c)))) INVV = O*INVR*O/phi } else if (famlink == "poisson log" | famlink == "poisson identity") { O=diag(J(1,Cnum[i],1:/sqrt(mu_c))) INVV = O*INVR*O/phi } else if (famlink == "gaussian identity" | famlink == "normal identity") { INVV = INVR/phi } U_i = D'*INVV*r //commands for generalized inverse - beta // USING WOODBURY FROM PREISSER ET AL. (2008) TO PERFORM A FAST INVERSE ////////////////////////// ai1=INVV mm1=D*sqe ai1A=ai1*r ai1m1=ai1*mm1 //INVBIG for (j=1; j<=p; j++) { b=ai1m1[,j] bt=b' btm=bt*mm1 btmi=btm[,j] gam=1-btmi bg=b/gam ai1A=ai1A+bg*(bt*r) if (j= 2) { // reversing naive naive2 = naive[(2::cols(naive)),1]' naive3 = naive[1,1] naive4 = (naive2,naive3)' naive5 = naive[2::rows(naive),2::cols(naive)] varNaive=(naive5 \ naive2),naive4 // reversing robust robust2 = robust[(2::cols(robust)),1]' robust3 = robust[1,1] robust4 = (robust2,robust3)' robust5 = robust[2::rows(robust),2::cols(robust)] varrb = (robust5 \ robust2),robust4 // reversing df df2 = df[(2::cols(df)),1]' df3 = df[1,1] df4 = (df2,df3)' df5 = df[2::rows(df),2::cols(df)] vardf = (df5 \ df2),df4 // reversing kc kc2 = kc[(2::cols(kc)),1]' kc3 = kc[1,1] kc4 = (kc2,kc3)' kc5 = kc[2::rows(kc),2::cols(kc)] varkc = (kc5 \ kc2),kc4 // reversing md md2 = md[(2::cols(md)),1]' md3 = md[1,1] md4 = (md2,md3)' md5 = md[2::rows(md),2::cols(md)] varmd = (md5 \ md2),md4 // reversing fg fg2 = fg[(2::cols(fg)),1]' fg3 = fg[1,1] fg4 = (fg2,fg3)' fg5 = fg[2::rows(fg),2::cols(fg)] varfg = (fg5 \ fg2),fg4 // reversing mbn mbn2 = mbn[(2::cols(mbn)),1]' mbn3 = mbn[1,1] mbn4 = (mbn2,mbn3)' mbn5 = mbn[2::rows(mbn),2::cols(mbn)] varmbn = (mbn5 \ mbn2),mbn4 } // for intercept-only models else { varNaive=naive varrb = robust vardf = df varkc = kc varmd = md varfg = fg varmbn = mbn } st_matrix("varNaive",varNaive) st_matrix("varrb",varrb) st_matrix("vardf",vardf) st_matrix("varkc",varkc) st_matrix("varmd",varmd) st_matrix("varfg",varfg) st_matrix("varmbn",varmbn) //////////////////////////////////////////////////////////////////////////////// // OUTPUT T-TEST INFO BACK TO STATA ///////////////////////////////////////////////// B2 = st_matrix(Beta2)' ncoef=length(B2) /* to compute degrees of freedom for the t-test */ st_matrix("ncoef",ncoef) st_matrix("nclust",K) //////////////////////////////////////////////////////////////////////////////// } end