capture program drop clusterbs
program define clusterbs, eclass

version 9.0									/* Ensures that the Stata version is 9.0 or above to run Mata and bootstrap */
	
gettoken modname 0 : 0                    	/* Strips the model name from the input (may not be needed) */
	
syntax varlist(ts) [, fe(string) cluster(name) reps(int 1000) seed(int 54321) festruc(string)] 		/* Gets the variable list, model name and cluster variable from the input */
	
gettoken depvar rhsvars : varlist				/* Strips the variables into right and left hand sides (may not be needed) */
	
local cmdline `modname' `depvar' `rhsvars'  	/* Sets `cmdline' to be the model specification */
	
set matsize 800								/* Increases the matrix size for bigger models (max for Stata IC is 800) */

preserve

if substr("`modname'", 1, 2) == "xt" {
	display "Program clusterbs will not work with models using the xt- prefix.  To run fixed effects models, specify fe() as an option."
	display "See the help file for how to use fixed effects properly with this program."
}
else { 
	quietly `cmdline', cluster(`cluster')  			/* Runs the initial model with robust clustered SEs */
}       

capture drop flag
qui gen flag = e(sample)				/* Marks the observations actually used in the model */
	
set more off

/* Save sigma_u, sigma_e, rho */

display "Cluster variable is " "`cluster'"

/* Sorting and counting the number of clusters */
	quietly drop if flag==0								/* Drops all observations not used in the initial model */
	
	capture drop neworder
	generate neworder = _n
	
	sort `cluster' neworder 					/* Sorts by cluster for dataset */
	
	capture drop ucluster
    by `cluster': gen ucluster = _n == 1 	/* Gives a 1 to the first observation in each cluster */
    
    sort neworder								/* Sorts the data back as it was */
    
	capture drop cluster_num
	gen cluster_num = sum(ucluster)				/* Gives a increasing number to each observation in each unique cluster */

	capture drop ucluster
	capture drop neworder  
	
	capture drop total_clusters
	egen total_clusters = max(cluster_num)  	/* Counts the total number of clusters */
    local clustnum = total_clusters[1]			/* Stores this variable as local clustnum */
   	capture drop total_clusters
    	
	display "Number of clusters for model is " `clustnum'

if ("`fe'" == "external" | "`fe'" == "inside") {

		capture drop new_fe
		if "`fe'" == "external" {
			quietly egen new_fe = group(`festruc')
			quietly xtset new_fe
		}
		if "`fe'" == "inside" {
			quietly egen new_fe = group(`festruc' cluster_num)
			quietly xtset new_fe
		}
		
	if "`modname'" == "regress" {
		quietly xtreg `depvar' `rhsvars', fe vce(cluster `cluster')
		local sigma_u = e(sigma_u)
		local sigma_e = e(sigma_e)
		local rho = e(rho)
	}

	if "`modname'" == "logit" {
		quietly xtlogit `depvar' `rhsvars', fe
		local sigma_u = e(sigma_u)
		local sigma_e = "NA"
		local rho = e(rho)
	}

	if "`modname'" == "poisson" {
		quietly xtpoisson `depvar' `rhsvars', fe
		local sigma_u = "NA"
		local sigma_e = "NA"
		local rho = "NA"
	}

	if "`modname'" == "nbreg" {
		quietly xtnbreg `depvar' `rhsvars', fe
		local sigma_u = "NA"
		local sigma_e = "NA"
		local rho = "NA"
	}
	
	if ("`modname'" != "regress" & "`modname'" != "logit" & "`modname'" != "poisson" & "`modname'" != "nbreg") {
		display "Error: Cluster fixed effects can only be used with the following models:"
		display "regress, logit, poisson, and nbreg"
		exit
	}
}
else {
	if "`fe'" == "cluster" {						/* If the model uses fixed effects, then it runs */
											/* the model as an xt- model and saves the estimates */
		quietly xtset `cluster'
	
		if "`modname'" == "regress" {
			quietly xtreg `depvar' `rhsvars', fe vce(cluster `cluster')
			local sigma_u = e(sigma_u)
			local sigma_e = e(sigma_e)
			local rho = e(rho)
		}

		if "`modname'" == "logit" {
			quietly xtlogit `depvar' `rhsvars', fe
			local sigma_u = e(sigma_u)
			local sigma_e = "NA"
			local rho = e(rho)
		}

		if "`modname'" == "poisson" {
			quietly xtpoisson `depvar' `rhsvars', fe
			local sigma_u = "NA"
			local sigma_e = "NA"
			local rho = "NA"
		}

		if "`modname'" == "nbreg" {
			quietly xtnbreg `depvar' `rhsvars', fe
			local sigma_u = "NA"
			local sigma_e = "NA"
			local rho = "NA"
		}
	
		if ("`modname'" != "regress" & "`modname'" != "logit" & "`modname'" != "poisson" & "`modname'" != "nbreg") {
			display "Error: Cluster fixed effects can only be used with the following models:"
			display "regress, logit, poisson, and nbreg"
			exit
		}
	}
	else {
		quietly `cmdline', cluster(`cluster')         /* Runs the initial model with robust clustered SEs */
		local sigma_u = "NA"
		local sigma_e = "NA"
		local rho = "NA"
	}
}

matrix estimates = e(b)
local beta_names : colfullnames(estimates)
local num_betas = wordcount("`beta_names'")

matrix main_est = J(`num_betas', 3, .)
matrix rownames main_est = `beta_names'
matrix colnames main_est = Coef StdErr t-value

if ("`sigma_u'"	== "NA" & "`sigma_e'" == "NA" & "`rho'" == "NA") {
	matrix final = J(`num_betas', 4, .)
	matrix rownames final = `beta_names'
	matrix colnames final = Coefficient Prob>|t| 95%_CI_low 95%_CI_high
}
else {
	if "`sigma_e'" == "NA" {
		matrix final = J((`num_betas' + 2), 4, .)
		local row_names = "`beta_names'" + " sigma_u rho"
		matrix rownames final = `row_names'
		matrix colnames final = Coefficient Prob>|t| 95%_CI_low 95%_CI_high
		matrix final[(`num_betas' + 1), 1] = `sigma_u'
		matrix final[(`num_betas' + 2), 1] = `rho'
	}
	else {
		matrix final = J((`num_betas' + 3), 4, .)
		local row_names = "`beta_names'" + " sigma_u sigma_e rho"
		matrix rownames final = `row_names'
		matrix colnames final = Coefficient Prob>|t| 95%_CI_low 95%_CI_high
		matrix final[(`num_betas' + 1), 1] = `sigma_u'
		matrix final[(`num_betas' + 2), 1] = `sigma_e'
		matrix final[(`num_betas' + 3), 1] = `rho'
	}
}

if _N < `reps' {
	qui set obs `reps'
}

local b = 1	
while `b' < (`num_betas' + 1) {					/* Runs a loop over the number of parameters */
	if `b' < `num_betas' gettoken var`b' beta_names: beta_names				/* Gets the variable name for each variable from betanames */
	else gettoken var`b': beta_names

	matrix main_est[`b', 1] = _b[`var`b'']
	matrix main_est[`b', 2] = _se[`var`b'']
	matrix main_est[`b', 3] = abs(_b[`var`b''] / _se[`var`b''])
	
	matrix final[`b', 1] = _b[`var`b'']
	
	if substr("`var`b''", 1, 2) == "o." {
		display "Error: Please remove variables that are collinear with the fixed effects "
		display "and re-run the model."
		exit
	}
	
	capture drop w_`var`b''	
	capture drop beta_`var`b''
	quietly {
		gen w_`var`b'' = .									/* Generates a new variable for each IV to store the w bootstrap estimates */
		gen beta_`var`b'' = .								/* Generates a new variable for each IV to store the beta bootstrap estimates */
	}
	local ++b														/* Iterates the loop */
}


tempfile main
quietly save `main', replace				/* Saves the original data for use later */

capture drop new_clustnum

di "Starting `reps' Bootstrap Replications"

set seed `seed'

forvalues k = 1/`reps' {
	di "." _continue
	
	quietly {
		
		bsample, cluster(`cluster') idcluster(new_clustnum)	  /* Creates a pairs cluster bootstrap sample with a new cluster variable name */
		
		if ("`fe'" == "external" | "`fe'" == "inside") {
			
			capture drop new_fe
			if "`fe'" == "external" {
				quietly gen new_fe = `festruc'
				quietly xtset new_fe
			}
			if "`fe'" == "inside" {
				quietly egen new_fe = group(`festruc' new_clustnum)
				quietly xtset new_fe
			}
			
			if "`modname'" == "regress" {
					capture xtreg `depvar' `rhsvars', fe vce(cluster new_clustnum)
				}

				if "`modname'" == "logit" {
					capture xtlogit `depvar' `rhsvars', fe
				}

				if "`modname'" == "poisson" {
					capture xtpoisson `depvar' `rhsvars', fe
				}

				if "`modname'" == "nbreg" {
					capture xtnbreg `depvar' `rhsvars', fe
				}
		}
		else {
			if "`fe'" == "cluster" {							/* This section resets the xtset using the new clustering indicator for each sample */
				quietly xtset new_clustnum
		
				if "`modname'" == "regress" {
					capture xtreg `depvar' `rhsvars', fe vce(cluster new_clustnum)
				}

				if "`modname'" == "logit" {
					capture xtlogit `depvar' `rhsvars', fe
				}

				if "`modname'" == "poisson" {
					capture xtpoisson `depvar' `rhsvars', fe
				}

				if "`modname'" == "nbreg" {
					capture xtnbreg `depvar' `rhsvars', fe
				}
			}
			else {
				quietly `cmdline', cluster(new_clustnum)         /* Runs the initial model with robust clustered SEs */
			}
		}
	}
	
	local b = 1
	while `b' < (`num_betas' + 1) {					/* Runs a loop over the number of parameters */
		if _rc != 0 {							/* If the model didn't run, this should store null values so the itration isn't counted */
			local beta_`var`b''_`k' = .					
			local w_`var`b''_`k' = .
		}
		else {
			capture {
				local beta_`var`b''_`k' = _b[`var`b'']					/* Quietly executes so that if a variable is excluded, it continues */	
				local w_`var`b''_`k' = (_b[`var`b''] - main_est[`b', 1]) / _se[`var`b''] 
			}
			if _rc != 0 {						/* Stores a null for the all the coefficients if any one variable is excluded from the model... */
				local q = 1						/* this should act to simply exclude any "bad" iterations of the bootstrapping */
				while `q' < (`num_betas' + 1) {
					local beta_`var`q''_`k' = .
					local w_`var`q''_`k' = .
					local ++q
				}
				continue, break					/* Breaks the loop over the variables (b) and continues to the constant and next iteration of k */
			}	
		}	
		local ++b														/* Iterates the loop */
	}	
	quietly use `main', replace					/* Restores the initial dataset */
}
di ""
di "Bootstrap iterations completed.  Now storing model results..."

forvalues k = 1/`reps' {
	local b = 1
	
	while `b' < (`num_betas' + 1) {
		quietly {
			replace beta_`var`b'' = `beta_`var`b''_`k'' in `k'			/* Stores the beta for each iteration and variable */
			replace w_`var`b'' = abs(`w_`var`b''_`k'') in `k'			/* Takes the absolute value of the w (t) statistic for each iteration and variable and stores it */
		}																
		local ++b
	}															
}

qui count if (w__cons != .)
local iter = r(N)
local fail = `reps' - `iter'

di "The model ran succesfully and stored results in `iter' bootstrap iterations."
if `fail' != 0 {
	di "`fail' bootstrap iterations failed to store results."
	di "If more than 10% of iterations failed, consider using a different model or method."
}

local b = 1
while `b' < (`num_betas' + 1) {
	
	qui count if (w_`var`b'' > main_est[`b', 3] & w_`var`b''!=.)
	local percent = r(N)
	
	local pval_`var`b'' = `percent' / `iter'			/* Generates the p-value comparing the initial model's w to the bootstrapped distribution of w */
	
	matrix final[`b', 2] = `pval_`var`b'''
	
	quietly {
		_pctile w_`var`b'', p(95)							/* Generates percentiles for the bootstrapped w__cons variable to obtain 95% CI */
		local par_`var`b'' = main_est[`b', 1]
		local low_`var`b'' = `par_`var`b''' - (r(r1) * main_est[`b', 2])			/* Calculates 95% CIs using the bootstrap-t method given in http://www.stata-journal.com/sjpdf.html?articlenum=st0073 (pp. 315-16) */
		local high_`var`b'' = `par_`var`b''' + (r(r1) * main_est[`b', 2])			/* and in http://kurt.schmidheiny.name/teaching/bootstrap2up.pdf (pp. 5-7) */
	}
	
	matrix final[`b', 3] = `low_`var`b'''
	matrix final[`b', 4] = `high_`var`b'''
	
	local ++b
}

capture estout matrix(final, fmt(%10.0g %12.1f %10.0g %10.0g))
if _rc != 0 {
	display "******************************* ERROR ********************************"
	display "Program estout.ado not found.  This program is needed to display results."
	display "To download estout, type in command line: SSC install estout"
}


if `iter' <= 500 estout matrix(final, fmt(%10.0g %12.1f %10.0g %10.0g)), style(smcl) title("Model Results") mlabels("", none) modelwidth(12 8 12 12) 
else {
	if `iter' <= 1000 estout matrix(final, fmt(%10.0g %12.2f %10.0g %10.0g)), style(smcl) title("Model Results") mlabels("", none) modelwidth(12 8 12 12)
	else {
		if `iter' <= 5000 estout matrix(final, fmt(%10.0g %12.3f %10.0g %10.0g)), style(smcl) title("Model Results") mlabels("", none) modelwidth(12 8 12 12)
		else estout matrix(final, fmt(%10.0g %12.4f %10.0g %10.0g)), style(smcl) title("Model Results") mlabels("", none) modelwidth(12 8 12 12)
	}
}


if "`fe'" == "cluster"	di "NOTE: Model estimated with fixed effects for the clusters (not shown)."
if "`fe'" == "external" di "NOTE: Model estimated with fixed effects for variable `festruc' (not shown)."
if "`fe'" == "inside" {
	di "NOTE: Model estimated with fixed effects for the interaction of the "
	di "cluster variable with `festruc' (not shown)."
}

di "The t-statistics and 95% confidence intervals are generated from the pairs "
di "cluster bootstrap-t procedure and are robust to clustering with a small number "
di "of sampling units.  Please note that the accuracy of the t-statistics and CIs "
di "is conditional on the number of bootstrap replications that were used to " 
di "calculate the distribution of t. For p < .05 significance tests, specify reps(500)"
di " or more. For p < .01 level, specify reps(1000) or more. For p < .001 level, specify "
di "reps(5000) or more.  More iterations will also yield more accurate confidence "
di "intervals.  Post-estimation procedures should not be run on this model."

restore

end