* EC771: univariate simulation experiment    cfb 4113

capt log close
log using 771simul1, replace
set more off
local reps 1000

* set up model: y1 contains a homoskedastic error
*               y2 contains an error term which is heteroskedastic across regions


* define the simulation experiment
capture program drop mcsimul1
program define mcsimul1, rclass
     version 8.0
     syntax [, c(real 1)]
     tempvar e1 e2

     gen `e1' = invnorm(uniform())*`c'*zmu
     gen `e2' = invnorm(uniform())*`c'*z_factor
     replace y1 = true_y + `e1'
     replace y2 = true_y + `e2'
	 summ y1
     return scalar mu1 = r(mean)
     return scalar se_mu1 = r(sd)/sqrt(r(N))
     summ y2
     return scalar mu2 = r(mean)
     return scalar se_mu2 = r(sd)/sqrt(r(N))
     return scalar c = `c'
end
	
	forv i=1/10 {
* 50 states' data from US Census
		webuse census2, clear
* true_y indicates the variable being used as base for the noisy measurement
		gen true_y = age
* z_factor is the variable generating heteroskedasticity in e2
		gen z_factor = region
		sum z_factor, meanonly
		scalar zmu = r(mean)
		qui gen y1 = .
		qui gen y2 = .
		local c = `i'/10
		simulate "mcsimul1, c(`c')" c= r(c) mu1=r(mu1) se_mu1=r(se_mu1) mu2=r(mu2) ///
		se_mu2=r(se_mu2), saving(c`i') reps(`reps') replace
		}
	
* combine the results files into a single file for tabulation
use c1
forv i=2/10 {
	append using c`i'
	}
gen het_infl = se_mu2 / se_mu1
save c_1_5,replace

* get descriptives on results file
tabstat mu1 se_mu1 mu2 se_mu2 het_infl, stat(mean) by(c)

tabstat het_infl, stat(mean q iqr) by(c)

set more on
log close