*! version 1.1.6 PR 04jan2007
/*
History

1.1.6 04jan2007 e(micombine) returns micombine. e(cmd2) returns `cmd'.
1.1.5 01nov2006 Changed default names for impid and obsid to _mj and _mi resp.
1.1.4 26jul2006 Awkward bug in determination of nobs fixed (reported by Christina Gibson-Davis)
1.1.3 23jun2006 svy option: Stata 8/9 functionality sorted out.
		infgain (hidden option) added.
1.1.2 23may2006 svy option added, with svy options as argument.
		Minor bug affecting stpm fixed.
1.1.1 28nov2005 eform and eform() option styles now allowed.
		nowarning option to suppress warning message about supported regression cmds.
1.1.0 30sep2005 Generalization of supported commands (cmdchk.ado also modified).
		Version <=7 no longer supported.
1.0.9 18apr2005 Default rowid variable becomes _i.
		If not found, take from char _dta[MI_obsid] then char _dta[mi_id].
		Default impid variable is _j. If not found, take from _dta[MI_impid].
		These changes are for future compatibility with MItools.
1.0.8 04mar2005 Fixed problem with eform on redisplay
1.0.7 28jan2005 Fixed problem with null model estimation
		Fixed bug with noconstant option
1.0.6 25jan2005 e(sample) now correct for all imputations, via ereturn post.
		Add d.f. quantities from Barnard & Rubin (1999) Biometrika 86:948-955 eq (3)-(5).
		Minor additions to ereturn quantities for compatibility with misw.
		Allow null model and model only with constant for benefit of misw.
1.0.5 16nov2004 Update calculations of quantities stored for Li et al (1991) F test.
1.0.4 13oct2004 Change e(cmd) to `cmd', make e(cmd2)="micombine" (for use after mfx).
		Implementation of mean log likelihood and chisquare statistic saved
		(note undocumented -noclear- but v. useful option to ereturn post/estimates post).
*/
program define micombine, eclass

if _caller()<=7 {
	di as error "version 7 and earlier not supported"
	exit 9
}

if replay() {
	if `"`e(micombine)'"'!="micombine" {
		error 301
	}
	syntax[, EForm EForm2(string)]
	if "`eform2'"!="" {
		if "`eform'"!="" di as err "[eform ignored]"
		local eform eform(`eform2')
	}
	else if "`eform'"!="" local eform eform("exp(b)")
	di as text _n "Multiple imputation parameter estimates (" as res e(m) as text " imputations)"
	capture ereturn  display, `eform'
	local rc=_rc
	if `rc'>0 {
		* Null model, or model with cc() only
		if `"`e(cmd)'"'!="stcox" {
			`e(cmd)' `0'
		}
		else di as text "[Null model - no estimates]"
	}
	else ereturn  display, `eform'
	di as result e(N) as text " observations."
	exit
}

gettoken cmd 0 : 0
if "`cmd'"=="stpm" {
	local dist 7
	local cmdnotknown 0
}
else {
	cmdchk `cmd'
	local cmdnotknown `s(bad)'
	/*
		dist=0 (normal), 1 (binomial), 2 (poisson), 3 (cox), 4 (glm),
		5 (xtgee), 6(ereg/weibull).
	*/
	local dist `s(dist)'
}
syntax [anything] [if] [in] [aw fw pw iw] , [ IMPid(string) BR CC(varlist) noCONStant ///
 DEAD(varname) DETail EForm EForm2(string) GENxb(string) INFgain LRR noWARning OBSid(string) ///
 SVYalone svy(string) * ]
 
if `cmdnotknown' & "`warning'"!="nowarning" {
	di as err _n "Warning: " as inp "`cmd'" as err " is not a certified regression command for micombine."
	di as err "micombine will continue mechanically, but correct results are not guaranteed."
	di as err "You must take responsibility that Rubin's rules are appropriate here." _n
}

if "`eform2'"!="" {
	if "`eform'"!="" di as err "[eform ignored]"
	local eform eform(`eform2')
}
else if "`eform'"!="" local eform eform("exp(b)")

* Check impid
if "`impid'"=="" local impid _mj
cap confirm var `impid'
if _rc {
	di as err "imputation identifier `impid' not found"
	exit 601
}

* Check obsid
if "`obsid'"=="" {
	local I: char _dta[mi_id]
	if "`I'"=="" local I _mi
}
else local I `obsid'
cap confirm var `I'
if _rc {
	di as err "observation identifier `I' not found"
	exit 601
}

if "`detail'"!="" {
	local detail noisily
}
else local detail

if "`svyalone'"!="" {
	if _caller()>=9 local svy svy:
	else local svy svy
}
else if `"`svy'"'!="" {
	if _caller()>=9 local svy svy, `svy':
	else {
		local svy svy
		local options `"`options' `svy'"'
	}
	else local svy svy, `svy':
}
*Change here 11/15/05, commenting out the line below !! PR DELETED XIAO CHEN'S EDIT, NOV 05
frac_cox "`dead'" `dist'

if "`constant'"=="noconstant" {
	if "`cmd'"=="fit" | "`cmd'"=="stcox" | "`cmd'"=="cox" {
		di as error "noconstant invalid with `cmd'"
		exit 198
	}
}

*Change here 11/15/05, `dist' could be null....  !! PR DELETED XIAO CHEN'S EDIT, NOV 05
*if "`dist'" =="7" {	/* stcox, streg, stpm */
if `dist'==7 {
	local y
	local yname _t
	local xvars `anything'
}
else {
	 gettoken y xvars : anything
	 gettoken xvars left: xvars, parse("(")
	 local yname `y'
}

tempvar touse
quietly {
/*
	Getting n for each imputation is not straightforward, since `anything'
	may give problems with marksample. Try to work around this issue.
*/
	marksample touse, novarlist
	local revise_touse 0
	capture markout `touse' `y' `xvars' `left' `dead' `cc'
	if _rc {
		markout `touse' `y' `xvars' `dead' `cc'	// I assume this can't fail!
		local revise_touse 1
	}
	if "`dead'"!="" {
		local dead "dead(`dead')"
	}

* Deal with weights.
	frac_wgt `"`exp'"' `touse' `"`weight'"'
	local wgt `r(wgt)'

	tempvar J
	* Important (compatibility with mitools): ignore rows for which impid=0
	egen int `J'=group(`impid') if `touse'==1 & `impid'>0 & !missing(`impid')
	sum `J', meanonly
	local m=r(max)
	if `m'<2 {
		di as error "there must be at least 2 imputations"
		exit 198
	}

	local nxvar: word count `xvars'
/*
	if `nxvar'<1 {
		di as err "there must be at least one covariate"
		exit 198
	}
*/
	local ncc: word count `cc'		/* could legitimately be zero */
	local nvar=`nxvar'+`ncc'		/* number of covariates in model */

	count if `touse'==1 & `J'==1
	local nobs=r(N)

* Null Cox model (or model with only ccvars): fit on final imputation only, and quit
	if "`xvars'"=="" & ("`cmd'"=="cox" | "`cmd'"=="stcox") {
		if "`cmd'"=="stcox" & "`cc'"=="" {
			local options `options' estimate
		}
		`detail' `svy'`cmd' `y' `cc' if `touse'==1 & `J'==`m' `wgt', `options' `dead' `constant'
		noi `cmd'
		di as result `nobs' as text " observations."
		ereturn  scalar m=`m'
		ereturn  local impid `impid'
		ereturn  local cmd `cmd'
		ereturn  local cmd2 micombine
		exit
	}

* Fit model on original data - get Wald chisquare
	if "`infgain'"!="" {
		tempname chi2_1 chi2_0 df_1 df_0 nold
		`detail' `svy'`cmd' `y' `xvars' `cc' `left' if `touse'==1 & `impid'==0 `wgt', `options' `dead' `constant'
		scalar `nold'=e(N)
		test `xvars' `cc' `left'
		scalar `df_0'=r(df)
		if missing(r(chi2)) scalar `chi2_0'=r(F)*`df_0'
		else scalar `chi2_0'=r(chi2)
	}

* Compute model over m imputations
	tempname W Q B T QQ
	if "`genxb'"!="" {
		tempvar xb xbtmp
		gen `xb'=.
	}
	* Estimate mean LR chisquare statistic (where possible)
	tempname chi2 ell ell0 nucom
	scalar `chi2'=0
	scalar `ell'=0
	scalar `ell0'=0
	forvalues i=1/`m' {
		tempname Q`i'
		`detail' `svy'`cmd' `y' `xvars' `cc' `left' if `touse'==1 & `J'==`i' `wgt', ///
		 `options' `dead' `constant'
		if `revise_touse' {
			* Deal with `left' that gave problems in markout.
			* Cautious approach is to replace `touse'; may not always be necessary,
			* but it is safe and costs little.
			replace `touse'=e(sample) if `J'==`i'
			if `i'==1 local nobs=e(N)
		}
		if e(N)!=`nobs' {
			noi di as txt "[Note: sample size in imputation `i' is " e(N) ", different from " `nobs' " in imp. 1]"
		}
		scalar `nucom'=e(df_r)	// complete-data residual degrees of freedom
		if `nucom'==. {
			scalar `nucom'=100000
		}
		scalar `ell'=`ell'+e(ll)
		scalar `ell0'=`ell0'+e(ll_0)
		scalar `chi2'=`chi2'-2*(e(ll_0)-e(ll))
		if "`genxb'"!="" {
			predict `xbtmp' if `touse'==1 & `J'==`i', xb
			replace `xb'=`xbtmp' if  `touse'==1 & `J'==`i'
			drop `xbtmp'
		}
		matrix `Q`i''=e(b)
		if `i'==1 {
			matrix `Q'=e(b)
			matrix `W'=e(V)
		}
		else {
			matrix `Q'=`Q'+e(b)
			matrix `W'=`W'+e(V)
		}

	}
	if "`genxb'"!="" {
		sort `touse' `I' `J'
		by `touse' `I': gen `genxb'=sum(`xb')/`m' if `touse'==1
		by `touse' `I': replace `genxb'=`genxb'[_N] if _n<_N
		lab var `genxb' "Mean Linear Predictor (`m' imputations)"
	}
	matrix `Q'=`Q'/`m'		/* MI param estimates */
	matrix `W'=`W'/`m'
	scalar `chi2'=`chi2'/`m'
	scalar `ell'=`ell'/`m'
	scalar `ell0'=`ell0'/`m'
	local k=colsof(`Q')
	matrix `B'=J(`k',`k',0)
	forvalues i=1/`m' {
		matrix `QQ'=`Q`i''-`Q'
		if `i'==1 {
			matrix `B'=`QQ''*`QQ'
		}
		else matrix `B'=`B'+`QQ''*`QQ'
	}
	matrix `B'=`B'/(`m'-1)
	matrix `T'=`W'+(1+1/`m')*`B'	/* estimated VCE matrix */
	/*
		Relative increase in variance due to missing information (r) for
		each variable, and df and lambda, the fraction of missing information.
		All measures are unstable for low m. See Schafer (1997) p. 110.

		Note that BIF = sqrt(T/W) = sqrt(1 + (B/W)*(1+1/m)) = sqrt(1+r)
		is the between-imputation imprecision factor, i.e. the ratio
		of the SE derived from T to the SE derived from W,
		ignoring between-imputation variation in parameter estimates.
	*/
	tempname r t lambda nu BIF
	matrix `r'=J(1,`k',0)
	matrix `lambda'=J(1,`k',0)
	matrix `nu'=J(1,`k',0)
	matrix `BIF'=J(1,`k',0)
	scalar `t'=`m'-1
	* Next few lines assign quantities for tests of individual (1 df) components of Q (=beta)
	forvalues j=1/`k' {
		matrix `r'[1,`j']=(1+1/`m')*`B'[`j',`j']/`W'[`j',`j']
		matrix `nu'[1,`j']=cond(`t'>4, 4+(`t'-4)*(1+(1-2/`t')/`r'[1,`j'])^2, `t'*(1+1/`r'[1,`j'])^2)
		matrix `lambda'[1,`j']=(`r'[1,`j']+2/(`nu'[1,`j']+3))/(`r'[1,`j']+1)
		matrix `BIF'[1,`j']=sqrt(1+`r'[1,`j'])	/* = sqrt(`T'[`j',`j']/`W'[`j',`j']) */
	}
	* Next few lines assign quantities for d.f. from Barnard & Rubin 1999 B'ka 86(4): 948-955.
	tempname nutilde num nuobs gamma
	matrix `nutilde'=J(1,`k',0)
	matrix `num'=J(1,`k',0)
	matrix `nuobs'=J(1,`k',0)
	matrix `gamma'=J(1,`k',0)
	forvalues j=1/`k' {
		matrix `gamma'[1,`j']=(1+1/`m')*`B'[`j',`j']/`T'[`j',`j']
		matrix `nuobs'[1,`j']=((`nucom'+1)/(`nucom'+3))*`nucom'*(1-`gamma'[1,`j'])
		matrix `num'[1,`j']=(`m'-1)*`gamma'[1,`j']^-2
		matrix `nutilde'[1,`j']=1/((1/`num'[1,`j']+1/`nuobs'[1,`j']))
	}
	* use all varnames
	local names: colnames(`Q1')
	matrix colnames `r'=`names'
	matrix colnames `nu'=`names'
	matrix colnames `lambda'=`names'
	matrix colnames `BIF'=`names'

	matrix colnames `gamma'=`names'
	matrix colnames `nuobs'=`names'
	matrix colnames `num'=`names'
	matrix colnames `nutilde'=`names'

	* Li, Raghunathan & Rubin (1991) estimates of T and nu1
	* for F test of Q=0 on k,nu1 degrees of freedom
	tempname r1 t1 BW TLRR
	matrix `BW'=`B'*syminv(`W')
	scalar `r1'=trace(`BW')*(1+1/`m')/`k'
	matrix `TLRR'=`W'*(1+`r1')
	scalar `t1'=`k'*(`m'-1)
	matrix colnames `Q'=`names'
	matrix rownames `T'=`names'
	matrix colnames `T'=`names'
	matrix rownames `B'=`names'
	matrix colnames `B'=`names'
	matrix rownames `TLRR'=`names'
	matrix colnames `TLRR'=`names'
}
di as text _n "Multiple imputation parameter estimates (`m' imputations)"
if "`lrr'"!="" {
	di as text "[Using Li-Raghunathan-Rubin (LRR) estimate of VCE matrix]"
	ereturn  post `Q' `TLRR', depname(`yname') obs(`nobs') esample(`touse') noclear
	ereturn  matrix T `T'
}
else {
	ereturn  post `Q' `T', depname(`yname') obs(`nobs') esample(`touse') noclear
	ereturn  matrix TLRR `TLRR'
}
if "`br'"=="" {
	ereturn  display, `eform'
	di as result `nobs' as text " observations (imputation 1)."
}
if "`infgain'"!="" {
	qui test `xvars' `cc' `left'
	scalar `df_1'=r(df)
	if missing(r(chi2)) scalar `chi2_1'=r(F)*`df_1'
	else scalar `chi2_1'=r(chi2)
	if reldif(`df_0',`df_1')>.001 di as txt _n "[cannot compute information gain, models have different dimension]"
	else di as txt _n "MI information gain = " as res %7.3f 100*(`chi2_1'-`chi2_0')/`chi2_0' as txt " percent." ///
	 " Sample size increase = " as res %7.3f 100*(`nobs'-`nold')/`nold' as txt " percent."
}
ereturn  matrix B `B'
ereturn  matrix W `W'
ereturn  matrix r `r'
ereturn  matrix nu `nu'
ereturn  matrix lambda `lambda'
ereturn  matrix BIF `BIF'

* Quantities for calculating df `nutilde' according to Barnard & Rubin (1999)
ereturn  matrix gamma `gamma'
ereturn  matrix nuobs `nuobs'
ereturn  matrix num `num'
ereturn  matrix nutilde `nutilde'

ereturn  scalar r1=`r1'
ereturn  scalar nu1=cond(`t1'>4, 4+(`t1'-4)*(1+(1-2/`t1')/`r1')^2, 0.5*`t1'*(1+1/`k')*(1+1/`r1')^2)
ereturn  scalar m=`m'
ereturn  scalar chi2=`chi2'
ereturn  scalar ll=`ell'
ereturn  scalar ll_0=`ell0'
ereturn  local eform `eform'
ereturn  local impid `impid'
ereturn  local cmd `cmd'
ereturn  local cmd2 `cmd'
ereturn  local micombine micombine
if "`br'"!="" {
	display_t
	di as result `nobs' as text " observations."
}
end

program define display_t
* Display results with t-statistics estimated according to Barnard & Rubin (1999)
	tempname V Q nu
	matrix `V'=e(V)
	matrix `Q'=e(b)
	matrix `nu'=e(nutilde)
	local yname `e(depvar)'
	local xs: colnames `Q'
	local k=colsof(`Q')
	di as text _n "Intervals and inference based on d.f. from Barnard & Rubin (1999)"
	di as txt "{hline 13}{c TT}{hline 64}"
	local t0 = abbrev("`yname'",12)
	if `"`e(eform)'"'!="" {
		local tt "Odds Ratio"
	}
	else {
		local tt "     Coef."
	}

	#delimit ;
	di as text
	%12s "`t0'" _col(14)"{c |}`tt'  Std. Err.     t   P>|t|  [$S_level% Conf. Intvl]     MI.df"
	_n "{hline 13}{c +}{hline 64}" ;
	#delimit cr
	tempname df mn se t p invt l u
	forvalues i=1/`k' {
		local x: word `i' of `xs'
		if "`x'"!="_cons" {
			local fmt : format `x'
			if substr("`fmt'",-1,1)=="f" {
				local fmt="%8."+substr("`fmt'",-2,2)
			}
			else if substr("`fmt'",-2,2)=="fc" {
				local fmt="%8."+substr("`fmt'",-3,3)
			}
			else local fmt "%8.0g"
			local fmt`i' `fmt'
		}
		else local fmt "%8.0g"
	        scalar `df' =`nu'[1,`i']
	        scalar `mn' = `Q'[1,`i']
	        scalar `se' = sqrt(`V'[`i',`i'])
	        scalar `t' = `mn'/`se'
	        scalar `p' = 2* ttail(`df', abs(`t'))
	        scalar `invt' = invttail(`df', (1-$S_level/100)/2)
	        scalar `l' = `mn' - `invt'*`se'
	        scalar `u' = `mn' + `invt'*`se'
	        if `"`e(eform)'"'!="" {
	        	scalar `mn' = exp(`mn')
	        	scalar `se' = `mn'*`se'
	        	scalar `l' = exp(`l')
	        	scalar `u' = exp(`u')
	        }
		if `df'>99999 {
			local fmtdf %9.2e
		}
		else local fmtdf %9.2f
		di as text /*
			*/  %12s abbrev("`x'",12)  _col(14) "{c |}" /*
			*/ _col(17)  as res `fmt'   `mn'	  /*
			*/ _col(27)  `fmt'  `se' /*
			*/ _col(36)   %7.2f	`t'  /*
			*/ _col(42)   %7.3f	`p'  /*
			*/ _col(52)  `fmt'  `l'  /*
			*/ _col(61)  `fmt'  `u'  /*
			*/ _col(70)  `fmtdf'   `df'
	}
	di as text "{hline 13}{c BT}{hline 64}"
end

program define chkrowid, sclass
local I: char _dta[mi_id]
if "`I'"=="" {
	di as error "no row-identifier variable found - data may have incorrect format"
	exit 198
}
cap confirm var `I'
local rc=_rc
if `rc' {
	di as error "row-identifier variable `I' not found"
	exit `rc'
}
sret local I `I'
end