***Script for illustrating Central Limit Theorem***

***
//AUTHOR: Marshall A. Taylor
//DATE: January 14, 2018
//NOTE: The goal is to show that the standard error estimated
	//from a single random variable is nearly equivalent to
	//the standard deviation of an empirically-derived sampling
	//distribution, regardless of how the random variable is
	//distributed. Good tutorial for illustrating how the 
	//denominator in the s.e. equation--sqrt(n)--work in
	//practice.
***

program define sdist
version 13.1
preserve

syntax , [samples(real 200) obs(real 500) type(string) par1(real 0) ///
	par2(real 1) round(real 0.001) histplot saveplot1(string) saveplot2(string) ///
	combine repplot lcolor(string) fcolor(string) ///
	bckg(string) nlcolor(string) nlwidth(real .5) nlpattern(string) dots]

qui {

describe
if r(changed)!=0 | r(k)!=0 | r(N)!=0 {
	display as error "Save and/or clear existing data before running -sdist-."
	exit 4
	}

if "`lcolor'"=="" {
	local lcolor "black"
	}
if "`nlcolor'"=="" {
	local nlcolor "black" 
	}
if "`fcolor'"=="" {
	local fcolor "gs6" 
	}
if "`bckg'"=="" {
	local bckg "white" 
	}
if "`nlpatterb'"=="" {
	local nlpattern "solid"
	}

if "`dots'"!="" {
nois _dots 0, title(Preparing for simulation) reps(`samples')
forvalues k = 1/`samples' { //Generate empty variables. Increasing this will
	gen var`k'=.          //result in a more normal sampling distribution.
	nois _dots `k' 0	//Just be sure to adjust var* text in loops below.
	}
}

if "`dots'"=="" {
forvalues k = 1/`samples' {
	gen var`k'=.
	}
}	
	
if "`type'"=="" local type "`par1'+(`par2'-`par1')*runiform()" //default
if "`type'"=="uniform" local type "`par1'+(`par2'-`par1')*runiform()"
if "`type'"=="normal" local type "rnormal(`par1',`par2')"
if "`type'"=="poisson" local type "rpoisson(`par2')"

if "`dots'"!="" {
nois _dots 0, title(Creating `samples' random samples with `obs' observations) ///
	reps(`samples')
foreach i of varlist var1-var`samples' { //Use K (above) to generate K random variables
	set obs `obs'                        //from a distribution w/ n each.
	gen `i'_r = `type'
	sum `i'_r
	gen `i'_mean=r(mean)
	nois _dots `i' 0
	}
}

if "`dots'"=="" {
foreach i of varlist var1-var`samples' { 
	set obs `obs'                        
	gen `i'_r = `type'
	sum `i'_r
	gen `i'_mean=r(mean)
	}
}

if "`dots'"!="" {
nois _dots 0, title(Creating dataset of random samples) reps(`samples')
foreach m of varlist var1-var`samples' { //Drop empty variables used to set random
    nois _dots `m' 0                    //random variables.
	drop `m'
	} 
}

if "`dots'"=="" {
foreach m of varlist var1-var`samples' {
	drop `m'
	}
}

rename var1_r x //Save one set of sample estimates for later comparison to the
drop *_r       //empirically-derived sampling distribution.
xpose, clear varname

local a=`samples'+1

sum v1 in 2/`a' //Getting empirically-derived mean and standard deviation of
local sa_mean=round(r(mean),`round') //the sampling distribution.
local sa_sd=round(r(sd),`round')

if "`saveplot1'"!="" {
if "`repplot'"!="" {
if "`histplot'"!="" {
	hist v1 in 2/`a', freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Empirical Sampling Distribution of `samples' X-bars" ///
		"{&mu}{sub:x-bar} = `sa_mean'; {&sigma}{sub:X-bar} = `sa_sd'") ///
		graphregion(fcolor(`bckg')) saving(`saveplot1', replace) name(plot1, replace)
				}
			}
		}

if "`saveplot1'"=="" {
if "`repplot'"!="" {
if "`saveplot2'"=="" {
if "`histplot'"!="" {
	hist v1 in 2/`a', freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Empirical Sampling Distribution of `samples' X-bars" ///
		"{&mu}{sub:x-bar} = `sa_mean'; {&sigma}{sub:X-bar} = `sa_sd'") ///
		graphregion(fcolor(`bckg')) saving(plot1.gph, replace) name(plot1, replace)
					}
				}
			}
		}
		
if "`saveplot1'"=="" {
if "`repplot'"!="" {
if "`saveplot2'"!="" {
if "`histplot'"!="" {
	hist v1 in 2/`a', freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Empirical Sampling Distribution of `samples' X-bars" ///
		"{&mu}{sub:x-bar} = `sa_mean'; {&sigma}{sub:X-bar} = `sa_sd'") ///
		graphregion(fcolor(`bckg')) name(plot1, replace)
					}
				}
			}
		}
		
if "`saveplot1'"=="" {
if "`repplot'"=="" {
if "`histplot'"!="" {
	hist v1 in 2/`a', freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Empirical Sampling Distribution of `samples' X-bars" ///
		"{&mu}{sub:x-bar} = `sa_mean'; {&sigma}{sub:X-bar} = `sa_sd'") ///
		graphregion(fcolor(`bckg')) name(plot1, replace)
				}
			}
		}
	
if "`saveplot1'"!="" {
if "`repplot'"=="" {
if "`histplot'"!="" {
	hist v1 in 2/`a', freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Empirical Sampling Distribution of `samples' X-bars" ///
		"{&mu}{sub:x-bar} = `sa_mean'; {&sigma}{sub:X-bar} = `sa_sd'") ///
		graphregion(fcolor(`bckg')) saving(`saveplot1') name(plot1, replace)
				}
			}
		}

xpose, clear varname

ci x //Getting standard error estimate from a single sample.
local x_se=round(r(se),`round')
sum x
local x_mean=round(r(mean),`round')
local x_sd=round(r(sd),`round')

local diff = round(abs(`sa_sd'-`x_se'),`round')

if "`saveplot2'"!="" {
if "`repplot'"!="" {
if "`histplot'"!="" { 
	hist x, freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Distribution of a Single X" ///
		"X-bar = `x_mean'; {it:s} = `x_sd'; se{sub:X-bar} = `x_se'") ///
		graphregion(fcolor(`bckg')) saving(`saveplot2', replace) name(plot2, replace)
				}
			}
		}

if "`saveplot2'"=="" {
if "`repplot'"!="" {
if "`saveplot1'"=="" {
if "`histplot'"!="" {
	hist x, freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Distribution of a Single X" ///
		"X-bar = `x_mean'; {it:s} = `x_sd'; se{sub:X-bar} = `x_se'") ///
		graphregion(fcolor(`bckg')) saving(plot2.gph, replace) name(plot2, replace)
					}		
				}
			}
		}
		
if "`saveplot2'"=="" {
if "`repplot'"!="" {
if "`saveplot1'"!="" {
if "`histplot'"!="" {
	hist x, freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Distribution of a Single X" ///
		"X-bar = `x_mean'; {it:s} = `x_sd'; se{sub:X-bar} = `x_se'") ///
		graphregion(fcolor(`bckg')) name(plot2, replace)
					}		
				}
			}
		}
		
if "`saveplot2'"=="" {
if "`repplot'"=="" {
if "`histplot'"!="" {
	hist x, freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Distribution of a Single X" ///
		"X-bar = `x_mean'; {it:s} = `x_sd'; se{sub:X-bar} = `x_se'") ///
		graphregion(fcolor(`bckg')) name(plot2, replace)
				}		
			}
		}	

if "`saveplot2'"!="" {
if "`repplot'"=="" {
if "`histplot'"!="" {
	hist x, freq normal fcolor(`fcolor') lcolor(`lcolor') ///
		normopts(lcolor(`nlcolor') lwidth(`nlwidth') lpattern(`nlpattern')) ///
		xtitle("Distribution of a Single X" ///
		"X-bar = `x_mean'; {it:s} = `x_sd'; se{sub:X-bar} = `x_se'") ///
		graphregion(fcolor(`bckg')) saving(`saveplot2') name(plot2, replace)
				}		
			}
		}	
	
mat S = J(3,1,.)
mat S[1,1] = `sa_sd'
mat S[2,1] = `x_se'
mat S[3,1] = `diff'
mat rownames S = sig_Xb se_Xb abs(diff)
mat colnames S = sd/se
noisily: matlist(S)  
noisily: disp ""               
noisily: disp "The difference between sig_Xb and se_Xb is `diff'. The larger"
noisily: disp "this difference, the poorer the single X variable standard error approximates"
noisily: disp "the standard deviation of the sampling distribution. This may be due to one"
noisily: disp "of two things: a small number of samples and/or a small sample size."

if "`combine'"!="" {
if "`saveplot1'"!="" & "`saveplot2'"!="" {
gr combine `saveplot1' `saveplot2' , ///
	col(1) imargin(0 0 0 0) graphregion(margin(l=22 r=22) fcolor(white))
		}
	}

if "`combine'"!="" {
if "`repplot'"!="" {
if "`saveplot1'"=="" & "`saveplot2'"=="" {
gr combine plot1.gph plot2.gph, ///
	col(1) imargin(0 0 0 0) graphregion(margin(l=22 r=22) fcolor(white)) 
			}
		}
	}

if "`combine'"!="" {
if "`saveplot1'"!="" & "`saveplot2'"=="" {
gr combine `saveplot1' plot2.gph, ///
	col(1) imargin(0 0 0 0) graphregion(margin(l=22 r=22) fcolor(white)) 
		}
	}	

if "`combine'"!="" {
if "`saveplot1'"=="" & "`saveplot2'"!="" {
gr combine plot1.gph `saveplot2' , ///
	col(1) imargin(0 0 0 0) graphregion(margin(l=22 r=22) fcolor(white)) 
		}
	}	

if "`combine'"!="" {
if "`histplot'"!="" {
if "`saveplot1'"=="" & "`saveplot2'"=="" & "`repplot'"=="" {
	disp as error "Need to save plots before combining them."
	exit 302
			}
		}
	}

if "`combine'"!="" {
if "`histplot'"=="" {
	disp as error "No plots were created to combine. Specify -histplot-."
	exit 302
		}
	}
	
clear	
}
end