
* April 29, 2025
*
* GMSS - Generalized Model Specification System
* James W. Hardin, Taylor A. Hardin, and Conner D. Hardin
*
* Examples showing the use of -gmss2stage- to calculate 
* two-stage models with Murphy-Topel variance estimates.

program drop _all
set tr   off
set rmsg off
set more off


local header 	noheader	// Change to -<empty>- to add extra output
local showcomp  	// Change to -*- to eliminate display results from citation


capture program drop ShowComp
program define ShowComp 
	args number
	
	// Copy-Pasted results from the A. R. Hole citation for the examples

	if `number' == 1 {
		local l1 age .0731059 .1096293 0.67 0.505 -.1417636 .2879755
		local l2 income .0452336 .4375397 0.10 0.918 -.8123285 .9027957
		local l3 avgexp -.0068969 .004265 -1.62 0.106 -.0152561 .0014623
		local l4 zhat 4.632355 10.82669 0.43 0.669 -16.58757 25.85228
		local l5 _cons -6.319947 9.661564 -0.65 0.513 -25.25626 12.61637
	}
	if `number' == 2 {
		local l1 age .0731059 .0542458 1.35 0.178 -.0332139 .1794258
		local l2 income .0452336 .1741114 0.26 0.795 -.2960184 .3864856
		local l3 avgexp -.0068969 .00202 -3.41 0.001 -.0108561 -.0029378
		local l4 zhat 4.632355 3.661774 1.27 0.206 -2.54459 11.8093
		local l5 _cons -6.319947 3.930768 -1.61 0.108 -14.02411 1.384217
	}
	if `number' == 3 {
		local l1 age .040167 .0375665 1.07 0.285 -.0334619 .1137959
		local l2 income .1221488 .1441061 0.85 0.397 -.1602941 .4045916
		local l3 avgexp -.0023466 .0010854 -2.16 0.031 -.0044739 -.0002192
		local l4 zhat 2.152821 2.385346 0.90 0.367 -2.522371 6.828014
		local l5 _cons -3.8865 2.604024 -1.49 0.136 -8.990293 1.217293
	}
	if `number' == 4 {
		local l1 age .0803012 .1509582 0.53 0.595 -.2155714 .3761738
		local l2 income .0397158 .5221716 0.08 0.939 -.9837218 1.063153
		local l3 avgexp -.0068861 .0047102 -1.46 0.144 -.0161178 .0023457
		local l4 zhat 5.393431 14.91054 0.36 0.718 -23.83068 34.61755
		local l5 _cons -7.094363 13.68211 -0.52 0.604 -33.9108 19.72207
	}
	if `number' == 5 {
		local l1 age .1097948 .4069624 0.27 0.787 -.6878369 .9074264
		local l2 income -.0550747 1.280603 -0.04 0.966 -2.565009 2.45486
		local l3 avgexp -.0068635 .0061429 -1.12 0.264 -.0189034 .0051765
		local l4 zhat 7.46005 34.49451 0.22 0.829 -60.14795 75.06805
		local l5 _cons -9.27511 33.76454 -0.27 0.784 -75.45239 56.90217
	}
	if `number' == 6 {
		local l1 age .107657 .1097165 0.98 0.326 -.1073833 .3226973
		local l2 income .0209116 .3621894 0.06 0.954 -.6889665 .7307897
		local l3 avgexp -.005743 .0023503 -2.44 0.015 -.0103495 -.0011365
		local l4 zhat 6.469631 7.848509 0.82 0.410 -8.913164 21.85243
		local l5 _cons -8.807249 8.353285 -1.05 0.292 -25.17939 7.564889
		local l6 lnalpha 1.15111 .5468807 2.10 0.035 .0792434 2.222976
	}
	if `number' == 7 {
		local l1 age .0415961 .0383581 1.08 0.278 -.0335844 .1167766
		local l2 income .1451392 .1519067 0.96 0.339 -.1525924 .4428708
		local l3 avgexp -.0028311 .0011394 -2.48 0.013 -.0050644 -.0005978
		local l4 zhat 2.551639 2.640499 0.97 0.334 -2.623645 7.726922
		local l5 cut1 4.237672 2.859636 1.48 0.138 -1.367112 9.842456
		local l6 cut2 4.799178 2.871063 1.67 0.095 -.8280026 10.42636
	}
	local k1 13
	local k2 26
	local k3 37
	local k4 46
	local k5 54
	local k6 67
	local k7 79
	forvalues k=1/8 {
		if "``l`k''" != "" {
			local str "noi di as txt"
			forvalues i=1/7 {
				local item : word `i' of `l`k''
				local c`i' = `k`i'' - length("`item'")
				local str `str' _col(`c`i'') `"`item'"' as res
			}
			`str'
		}
	}
end




use dta/greene, clear	
/*
Table F9.1: Income and Expenditure Data. 100 Cross Section Observations
Source: Greene (1992)

Variable	Information
mdr 		Number of derogatory reports
acc			Credit card application acceptance indicator			
age			Age in years + 12ths of years
income		Income divided by 10,000
expend		Average monthly credit card expenditure
ownrent		Owns (1) or rents (0) their home
selfemp		Self-employed indicator
*/

gen byte mdrgt0 = cond(mdr==.,.,mdr>0)	// Binary version of the count
gen mdr3lvl = mdr + 1
replace mdr3lvl = 3 if mdr3lvl>3		// 3-level version of the count	


// Clear the GMSS system and define a set of optimization options
gmss clear 
gmss opt d2, evaluator(d2) maxiter(40) search(on)




/*
 Examples to run from 
	A. R. Hole (2006) Calculating Murphy-Topel Variance
	Estimates in Stata: A Simplified Procedure, 
	The Stata Journal, 6(4), 521-529
	
	1) Logit	(acc|x1) -> Poisson			(mdr|mu_logit x2)		(M-T)
	2) Logit	(acc|x1) -> Poisson			(mdr|mu_logit x2)		(Naive)
	3) Logit	(acc|x1) -> Probit			(mdrgt0|mu_logit x2)	(M-T)
	4) Probit	(acc|x1) -> Poisson			(mdr|mu_probit x2)		(M-T)
	5) Regress	(acc|x1) -> Poisson			(mdr|mu_reg x2)			(M-T)
	6) Logit	(acc|x1) -> Neg. binomial2	(mdr|mu_logit x2)		(M-T)
	7) Probit	(acc|x1) -> Ordered probit	(mdr3lvl|mu_probit x2)	(M-T)
	
	
	Note that example 5 is run using regress in the cited manuscript, and we
	are running maximum likelihood Gaussian regression.  This example differs
	because:
		1) Sigma is part of the parameter vector for GMSS
		2) Sigma is scaled 1/n by GMSS and 1/(n-p) by OLS regression
	Even so, the final results are close.
*/


// Stage 1
local opt_opts d2						// GMSS optimization options

local y1 acc							// Stage 1: dependent variable
local x1 age income ownrent selfemp		// Stage 1, Parm 1: covariates


// Stage 1 - Logit
gmss link acc_logit , y(`y1') x(`x1') name(logit)
gmss dist logit     , name(bernoulli) link(acc_logit)
gmss init s1_logit  , dist(logit) opt(`opt_opts') nowarning
gmss run  s1_logit  , nolog noheader notable  			
predict double mu_logit

// Stage 1 - Probit
gmss link acc_probit, y(`y1') x(`x1') name(probit)
gmss dist probit    , name(bernoulli) link(acc_probit)
gmss init s1_probit , dist(probit) opt(`opt_opts') nowarning
gmss run  s1_probit , nolog noheader notable 			
predict double mu_probit

// Stage 1 - Regress
gmss link acc_reg   , y(`y1') x(`x1') name(identity)
gmss link sigma     , name(log)
gmss dist reg       , name(normal) link(acc_reg sigma)
gmss init s1_reg    , dist(reg) opt(`opt_opts') nowarning
gmss run  s1_reg    , nolog noheader notable 			
predict double mu_reg


// Stage 2
local y2_cnt   mdr
local y2_bin   mdrgt0
local y2_3lvl  mdr3lvl
local x2 age income avgexp

gmss link mdr_logl   , y(`y2_cnt')  x(`x2' mu_logit)  name(log)
gmss link mdr_logp   , y(`y2_cnt')  x(`x2' mu_probit) name(log)
gmss link mdr_logr   , y(`y2_cnt')  x(`x2' mu_reg)    name(log)
gmss link mdr_bin    , y(`y2_bin')  x(`x2' mu_logit)  name(probit)
gmss link mdr_prob3  , y(`y2_3lvl') x(`x2' mu_probit) cons(off) name(probit)

gmss link sigma      , cons(on) name(log)  
gmss link k1         , cons(on) name(identity) label(_cut1)
gmss link k2         , cons(on) name(identity) label(_cut2)

// Example 1
gmss dist poisson    , name(poisson) link(mdr_logl)
gmss init s2_ex1     , dist(poisson) opt(`opt_opts') nowarning
gmss2stage s1_logit s2_ex1, zhat(mu_logit) `header'
`showcomp' ShowComp 1


// Example 2
gmss run s2_ex1      , nolog `header'
`showcomp' ShowComp 2

// Example 3
gmss dist bernoulli  , name(bernoulli) link(mdr_bin)
gmss init s2_ex3     , dist(bernoulli) opt(`opt_opts') nowarning
gmss2stage s1_logit s2_ex3, zhat(mu_logit) `header'
`showcomp' ShowComp 3

// Example 4
gmss dist poisson    , name(poisson) link(mdr_logp)
gmss init s2_ex4     , dist(poisson) opt(`opt_opts') nowarning
gmss2stage s1_probit s2_ex4, zhat(mu_probit) `header'
`showcomp' ShowComp 4

// Example 5 : Note we do ML Gaussian regression
gmss dist poisson    , name(poisson) link(mdr_logr)
gmss init s2_ex5     , dist(poisson) opt(`opt_opts') nowarning
gmss2stage s1_reg s2_ex5, zhat(mu_reg) `header'
`showcomp' ShowComp 5

// Example 6
gmss dist nbin2      , name(nbinomial2) link(mdr_logl sigma)
gmss init s2_ex6     , dist(nbin2) opt(`opt_opts') nowarning
gmss2stage s1_logit s2_ex6, zhat(mu_logit) `header'
`showcomp' ShowComp 6

// Example 7
gmss dist oprobit    , name(ordered) link(mdr_prob3 k1 k2)
gmss init s2_ex7     , dist(oprobit) opt(`opt_opts') nowarning
gmss2stage s1_probit s2_ex7, zhat(mu_probit) `header'
`showcomp' ShowComp 7

