*! version 4.0 May2023  Matteo Pinna, matteo.pinna@gess.ethz.ch

* Versions:
* version 1.1 partly fixes the display of multiple graphs, sets default values for xmin and ymin, add twoway general options to the histograms and solves some bugs in the error messages
* version 1.2 changes the command name, the default width to 100% and default color style of the bars
* version 1.3 varies the method of scatterpoints retrieving to avoid intermediate files and makes the program an rclass program, adding the possibility to ereturn the values from the reg/areg used to get residuals
* version 1.4 adds some warnings and fixes some issues in the help file
* version 2.0 adds an option to report automatically coefficient (s.e.) and/or sample size with or without p-value stars. Uses reghdfe as default, but option areg can be alternatively specified. Furthermore, allows for clustered or robust standard errors for the coefficient report, modifying therefore the sample for residualization and for estimation.
* version 2.1 fixes a compatibility problem with older STATA versions, of the new options coefficient and sample (stata <16) and transparency of colors (stata<15)
* version 2.2 fixes an issue with color compatibility and adds rounding option for coefficient reporting
* version 2.3 adds the options pvalue and ci()
* version 2.4 fixes issue with coef se and ci reporting in some cases, and adds options to adjust coefficient positioning
* version 3.0 adds binning via binsreg
* version 3.1 fixes an issue in trasmitting graph options to addplot and add info on correct format of legend
* version 3.2 missing brace added
* version 3.3 fixes bug in sample reporting for binsreg option and an issue with the legend
* version 4.0 adds the scatter option for a binscatter within a scatter, in addition to msize and boxposition options.

/*
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.  
The full legal text as well as a human-readable summary can be accessed at http://creativecommons.org/licenses/by-nc-sa/4.0/
*/

* Any feedback on issues and possible new features is very welcome.

* This program is based on the original binscatter by Michael Stepner (2013): "BINSCATTER: Stata module to generate binned scatterplots" - https://EconPapers.repec.org/RePEc:boc:bocode:s457709 and uses Ben Jann (2014): "ADDPLOT: Stata module to add twoway plot objects to an existing twoway graph," Statistical Software Components S457917, Boston College Department of Economics, revised 28 Jan 2015 <https://ideas.repec.org/c/boc/bocode/s457917.html> and the BINSREG program: Cattaneo, M. D., R. K. Crump, M. H. Farrell, and Y. Feng. 2019b.  Binscatter Regressions. arXiv:1902.09615.

cap program drop binscatterhist
program define binscatterhist, rclass sortpreserve

local stata_version=c(version)

	version 12.1
	syntax varlist(min=2 numeric) [if] [in] [aweight fweight], [by(varname) ///
		Nquantiles(integer 20) GENxq(name) discrete xq(varname numeric) MEDians ///
		CONTROLs(varlist numeric ts fv) absorb(varlist) noAddmean REGtype(string) ///
		LINEtype(string) rd(numlist ascending) reportreg ///
		COLors(string) MColors(string) LColors(string) Msymbols(string) msize(string) ///
		savegraph(string) savedata(string) replace ///
		nofastxtile randvar(varname numeric) randcut(real 1) randn(integer -1) ///
		/* OPTIONS */ nbins(integer 20) create_xq x_q(varname numeric) symbols(string) method(string) unique(string) ///
		/* scatterpoints creation */ binsreg ///
		/* standard errors */ CLUSTer(varname) vce(string) ///
		/* coefficient display */ COEFficient(string) BOXPOSition(string) sample Pvalue ci(string) stars(string) ///
		/* histogram options */ HISTogram(string) xmin(string) ymin(string) xhistbarheight(string) yhistbarheight(string) xhistbarwidth(string) yhistbarwidth(string) xhistbins(string) yhistbins(string) ///
		/* histogram esthetic options */ xcolor(string) xcfcolor(string) xfintensity(string) xlcolor(string) xlwidth(string) xlpattern(string) xlalign(string) xlstyle(string) xbstyle(string) xpstyle(string) ycolor(string) ycfcolor(string) yfintensity(string) ylcolor(string) ylwidth(string) ylpattern(string) ylalign(string) ylstyle(string) ybstyle(string) ypstyle(string) ///
		/* background scatter options*/ scatter(string) scattermsymbols(string) scattermcolor(string) scattermsize(string) ///
		/* options legacy */ name(string) legacy(string) ///
		*]
		
	set more off
	
	* Parse varlist into y-vars and x-var
	local x_var=word("`varlist'",-1)
	local y_vars=regexr("`varlist'"," `x_var'$","")
	local ynum=wordcount("`y_vars'")
	
	* Create convenient weight local
	if ("`weight'"!="") local wt [`weight'`exp']
	
	***** Begin legacy option compatibility code
	
	if (`nbins'!=20) {
		if (`nquantiles'!=20) {
			di as error "Cannot specify both nquantiles() and nbins(): both are the same option, nbins is supported only for backward compatibility."
			exit
		}
		di as text "NOTE: legacy option nbins() has been renamed nquantiles(), and is supported only for backward compatibility."
		local nquantiles=`nbins'
	}
	
	if ("`create_xq'"!="") {
		if ("`genxq'"!="") {
			di as error "Cannot specify both genxq() and create_xq: both are the same option, create_xq is supported only for backward compatibility."
			exit
		}
		di as text "NOTE: legacy option create_xq has been renamed genxq(), and is supported only for backward compatibility."
		local genxq="q_"+word("`varlist'",-1)
	}
	
	if ("`x_q'"!="") {
		if ("`xq'"!="") {
			di as error "Cannot specify both xq() and x_q(): both are the same option, x_q() is supported only for backward compatibility."
			exit
		}
		di as text "NOTE: legacy option x_q() has been renamed xq(), and is supported only for backward compatibility."
		local xq `x_q'
	}
	
	if ("`symbols'"!="") {
		if ("`msymbols'"!="") {
			di as error "Cannot specify both msymbols() and symbols(): both are the same option, symbols() is supported only for backward compatibility."
			exit
		}
		di as text "NOTE: legacy option symbols() has been renamed msymbols(), and is supported only for backward compatibility."
		local msymbols `symbols'
	}

	if ("`binsreg'"!="") & (("`by'"!="") | ("`medians'"!="") | ("`genxq'"!="") | ("`discrete'"!="") | ("`xq'"!="") | ("`nofastxtile'"!="") | ("`randvar'"!="") | ("`randcut'"!="1")  | ("`randn'"!="-1")) {
	display as error "The binsreg option cannot be used together with by, medians, genxq, discrete, xq, nofastxtile, randvar, randcut, randn"
	exit
	}

	if ("`linetype'"=="noline") {
		di as text "NOTE: legacy line type 'noline' has been renamed 'none', and is supported only for backward compatibility."
		local linetype none
	}
	
	if ("`method'"!="") {
		di as text "NOTE: method() is no longer a recognized option, and will be ignored. binscatter now always uses the fastest method without a need for two instances."
	}
	
	if ("`unique'"!="") {
		di as text "NOTE: unique() is no longer a recognized option, and will be ignored. binscatter now considers the x-variable discrete if it has fewer unique values than nquantiles()."
	}
	
	if ("`histogram'"!="") & ("`histogram'"!="`x_var' `y_vars'") & ("`histogram'"!="`y_vars' `x_var'") & ("`histogram'"!="`x_var'") & ("`histogram'"!="`y_vars'") {
	di as error "Option histogram() must include either one or both variables graphed in the binscatter."
	}

	if ("`histogram'"=="") & (("`xmin'"!="") | ("`ymin'"!="") | (("`xmin'"!="") & ("`ymin'"!=""))) {
	di as error "Options ymin() and/or xmin() are only allowed together with histogram()."
	}

	if ("`histogram'"=="") & (("`xhistbarheight'"!="") | ("`xhistbarwidth'"!="") | ("`xhistbins'"!="") | ("`yhistbarheight'"!="") | ("`yhistbarwidth'"!="") | ("`yhistbins'"!="")) {
	di as error "Options *histbarwidth() and/or *histbarheight() and/or *histbins() are only allowed together with histogram()."
	}
	
	if ("`regtype'"!="") & ("`regtype'"!="reghdfe") & ("`regtype'"!="areg") {
	di as error "Option regtype() can only be areg or reghdfe - reghdfe is the default."
	}	
	
	if ("`cluster'"!="") & ("`vce'"!="") {
	di as error "Options cluster() and vce() cannot be specified together."
	exit
	}	
	
	if ("`linetype'"=="none") & (("`coefficient'"!="") | ("`sample'"!="")) {
	di as error "Coefficient and sample size are estimated through the line fitting."
	}	
	
	if ("`vce'"!="") & ("`vce'"!="robust") {
	di as error "Use vce(robust) for robust s.e. or cluster(varname) for clustered s.e."
	}	
	
	if ("`stars'"!="") & ("`stars'"!="nostars") & ("`stars'"!="1") & ("`stars'"!="2") & ("`stars'"!="3") & ("`stars'"!="4") {
	di as error "Option stars() can only be: nostars, 1, 2, 3, 4."
	}
	
	if (("`ci'"!="")|("`pvalue'"!="")) & ("`coefficient'"==""){
	di as error "Pvalue and ci(level) options require the option COEFficient(rounding)"
	}	
	
	***** End legacy option capatibility code

	*** Perform checks

	* addplot check
	if "`histogram'"!="" {
		capt which addplot
		if _rc !=0 {
		di as error "Binscatterhist requires addplot to be installed. Addplot can be installed by typing ssc install addplot"
		exit
		}
	}

	* reghdfe check
	if ("`regtype'"=="") | ("`regtype'"=="reghdfe"){
	capt which reghdfe
		if _rc !=0 {
		di as error "Binscatterhist requires reghdfe to be installed. Reghdfe can be installed by typing ssc install reghdfe"
		exit
		}
	}
	
	if "`binsreg'"!="" {
		* binsreg check
		capt which binsreg
			if _rc !=0 {
			di as error "Binscatterhist requires binsreg to be installed to use the binsreg option. Binsreg can be installed by typing ssc install binsreg"
			exit
			}
	}
	
	* Set default linetype and check valid
	if ("`linetype'"=="") local linetype lfit
	else if !inlist("`linetype'","connect","lfit","qfit","none") {
		di as error "linetype() must either be connect, lfit, qfit, or none"
		exit
	}
	
	* Check that nofastxtile isn't combined with fastxtile-only options
	if "`fastxtile'"=="nofastxtile" & ("`randvar'"!="" | `randcut'!=1 | `randn'!=-1) {
		di as error "Cannot combine randvar, randcut or randn with nofastxtile"
		exit
	}

	* Misc checks
	if ("`genxq'"!="" & ("`xq'"!="" | "`discrete'"!="")) | ("`xq'"!="" & "`discrete'"!="") {
		di as error "Cannot specify more than one of genxq(), xq(), and discrete simultaneously."
		exit
	}
	if ("`genxq'"!="") confirm new variable `genxq'
	if ("`xq'"!="") {
		capture assert `xq'==int(`xq') & `xq'>0
		if _rc!=0 {
			di as error "xq() must contain only positive integers."
			exit
		}
		
		if ("`controls'`absorb'"!="") di as text "warning: xq() is specified in combination with controls() or absorb(). note that binning takes places after residualization, so the xq variable should contain bins of the residuals."
	}
	if `nquantiles'!=20 & ("`xq'"!="" | "`discrete'"!="") {
		di as error "Cannot specify nquantiles in combination with discrete or an xq variable."
		exit
	}
	if "`reportreg'"!="" & !inlist("`linetype'","lfit","qfit") {
		di as error "Cannot specify 'reportreg' when no fit line is being created."
		exit
	}
	if "`replace'"=="" {
		if `"`savegraph'"'!="" {
			if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") confirm new file `"`savegraph'"'
			else confirm new file `"`savegraph'.gph"'
		}
		if `"`savedata'"'!="" {
			confirm new file `"`savedata'.csv"'
			confirm new file `"`savedata'.do"'
		}
	}
	
	* Mark sample (reflects the if/in conditions, and includes only nonmissing observations) - the sample must also exclude singletons if a reghdfe is run
		if `"`absorb'"'!="" {
			if ("`regtype'"=="")|("`regtype'"=="reghdfe") {
			cap qui reghdfe `y_vars', absorb(`absorb')
			tempvar singleton_dropped
			cap qui gen `singleton_dropped'=1 if e(sample)
			}			
		}
	marksample touse
	markout `touse' `by' `xq' `controls' `absorb' `addvce' `addresid' `singleton_dropped', strok
	qui count if `touse'
	local samplesize=r(N)
	local touse_first=_N-`samplesize'+1
	local touse_last=_N

	* Bins adjustment based on scatter (after sample size)
	if ("`scatter'"!="") {
	local nquantiles2=`nquantiles'
		if ("`scatter'"=="N") | ("`scatter'"=="n") {
			local nquantiles=`samplesize'
		}	
		if ("`scatter'"!="N") & ("`scatter'"!="n") {
			local nquantiles=`scatter'
		}	
	}
	
	* Check number of unique byvals & create local storing byvals
	if "`by'"!="" {
		local byvarname `by'
	
		capture confirm numeric variable `by'
		if _rc {
			* by-variable is string => generate a numeric version
			tempvar by
			tempname bylabel
			egen `by'=group(`byvarname'), lname(`bylabel')
		}
		
		local bylabel `:value label `by'' /*catch value labels for numeric by-vars too*/ 
		
		tempname byvalmatrix
		qui tab `by' if `touse', nofreq matrow(`byvalmatrix')
		
		local bynum=r(r)
		forvalues i=1/`bynum' {
			local byvals `byvals' `=`byvalmatrix'[`i',1]'
		}
	}
	else local bynum=1
	
	******  Name  ******
		if ("`name'"!="") {
		local addname="name(`name')"
		}
	* Isolate name from name local, solving a conflict with addplot
	tempvar comma_pos
	gen `comma_pos'=strpos("`name'",",")
	replace `comma_pos'=`comma_pos'-1
	local name_addplot=substr("`name'",1,`comma_pos')
		
	******  Standard errors  ******
		
		if ("`cluster'"!="") {
		local addcluster="cluster(`cluster')"
		}
		if ("`vce'"!="") {
		local addvce="vce(`vce')"
		}

	******  Confidence interval  ******
		if ("`ci'"!="") {
		local addci="level(`ci')"
		}	
	
	****** Save constant if binsreg is run 
	if ("`reportreg'"=="") | (("`reportreg'"!="") & ("`binsreg'"=="")) local binsreg_verbosity "quietly"
	if ("`binsreg'"!="") `binsreg_verbosity' {
		if `"`absorb'"'!="" {
		local absorb "absorb(`absorb')"
			if ("`regtype'"=="")|("`regtype'"=="reghdfe") {
			local regtype "reghdfe"
			local addresid="resid"
			}
			if ("`regtype'"=="areg") {
			local regtype "areg"
			}
		}
		else {
			local regtype "reg"
		}
	
		if ("`binsreg_verbosity'"=="quietly") capture `regtype' `y_vars' `x_var' `controls' `wt' if `touse', `absorb' `addcluster' `addvce'
			else capture noisily `regtype' `y_vars' `x_var' `controls' `wt' if `touse', `absorb' `addcluster' `addvce'	
	mat binsreg_eb=e(b)
	mat binsreg_eV=e(V)
	mat mat_edf_r=e(df_r)
	local binsreg_edf_r=mat_edf_r[1,1]
	local binsreg_const=binsreg_eb[1,"_cons"] 	
	}
	*
	
	******  Create residuals  ******
	
	if (`"`controls'`absorb'"'!="") quietly {
	
		* Parse absorb to define the type of regression to be used
		if `"`absorb'"'!="" {
		local absorb "absorb(`absorb')"
			if ("`regtype'"=="")|("`regtype'"=="reghdfe") {
			local regtype "reghdfe"
			local addresid="resid"
			}
			if ("`regtype'"=="areg") {
			local regtype "areg"
			}
		}
		else {
			local regtype "reg"
		}
		
		* Generate residuals
		local firstloop=1
		foreach var of varlist `x_var' `y_vars' {
			tempvar residvar 
			`regtype' `var' `controls' `wt' if `touse', `absorb' `addcluster' `addvce' `addresid' `addci'
			predict `residvar' if e(sample), residuals
			if ("`addmean'"!="noaddmean") {
				summarize `var' `wt' if `touse', meanonly
				replace `residvar'=`residvar'+r(mean)
			}
			
			label variable `residvar' "`var'"
			if `firstloop'==1 {
				local x_r `residvar'
				local firstloop=0
			}
			else local y_vars_r `y_vars_r' `residvar'
		}
		
	}
	else { 	/*absorb and controls both empty, no need for regression*/
		local x_r `x_var'
		local y_vars_r `y_vars'
	}
	****** Regressions for fit lines ******
	if ("`reportreg'"=="") | (("`reportreg'"!="") & ("`binsreg'"!="")) local reg_verbosity "quietly"

	if inlist("`linetype'","lfit","qfit") `reg_verbosity' {

		* If doing a quadratic fit, generate a quadratic term in x
		if "`linetype'"=="qfit" {
			tempvar x_r2
			gen `x_r2'=`x_r'^2
		}
		
		* Create matrices to hold regression results
		tempname e_b_temp
		forvalues i=1/`ynum' {
			tempname y`i'_coefs
		}
		
		* LOOP over by-vars
		local counter_by=1
		if ("`by'"=="") local noby="noby"
		foreach byval in `byvals' `noby' {
		
			* LOOP over rd intervals
			tokenize  "`rd'"
			local counter_rd=1	
				
			while ("`1'"!="" | `counter_rd'==1) {
			
				* display text headers
				if "`reportreg'"!="" {
					di "{txt}{hline}"
					if ("`by'"!="") {
						if ("`bylabel'"=="") di "-> `byvarname' = `byval'"
						else {
							di "-> `byvarname' = `: label `bylabel' `byval''"
						}
					}
					if ("`rd'"!="") {
						if (`counter_rd'==1) di "RD: `x_var'<=`1'"
						else if ("`2'"!="") di "RD: `x_var'>`1' & `x_var'<=`2'"
						else di "RD: `x_var'>`1'"
					}
				}
				
				* set conditions on reg
				local conds `touse'
				
				if ("`by'"!="" ) local conds `conds' & `by'==`byval'
				
				if ("`rd'"!="") {
					if (`counter_rd'==1) local conds `conds' & `x_r'<=`1'
					else if ("`2'"!="") local conds `conds' & `x_r'>`1' & `x_r'<=`2'
					else local conds `conds' & `x_r'>`1'
				}
					
				* LOOP over y-vars
				local counter_depvar=1
				foreach depvar of varlist `y_vars_r' {
				
					* display text headers
					if (`ynum'>1) {
						if ("`controls'`absorb'"!="") local depvar_name : var label `depvar'
						else local depvar_name `depvar'
						di as text "{bf:y_var = `depvar_name'}"
					}
					
					* perform regression
					if ("`reg_verbosity'"=="quietly") capture reg `depvar' `x_r2' `x_r' `wt' if `conds', `addcluster' `addvce'
					else capture noisily reg `depvar' `x_r2' `x_r' `wt' if `conds', `addcluster' `addvce'
					* store results
					if (_rc==0) matrix e_b_temp=e(b)
					else if (_rc==2000) {
						if ("`reg_verbosity'"=="quietly") di as error "no observations for one of the fit lines. add 'reportreg' for more info."
						
						if ("`linetype'"=="lfit") matrix e_b_temp=.,.
						else /*("`linetype'"=="qfit")*/ matrix e_b_temp=.,.,.
					}
					else {
						error _rc
						exit _rc
					}
					
					* relabel matrix row			
					if ("`by'"!="") matrix roweq e_b_temp = "by`counter_by'"
					if ("`rd'"!="") matrix rownames e_b_temp = "rd`counter_rd'"
					else matrix rownames e_b_temp = "="
					
					* save to y_var matrix
					if (`counter_by'==1 & `counter_rd'==1) matrix `y`counter_depvar'_coefs'=e_b_temp
					else matrix `y`counter_depvar'_coefs'=`y`counter_depvar'_coefs' \ e_b_temp
					
					* increment depvar counter
					local ++counter_depvar
				}
			
				* increment rd counter
				if (`counter_rd'!=1) mac shift
				local ++counter_rd
				
			}
			
			* increment by counter
			local ++counter_by
			
		}
	
		* relabel matrix column names
		forvalues i=1/`ynum' {
			if ("`linetype'"=="lfit") matrix colnames `y`i'_coefs' = "`x_var'" "_cons"
			else if ("`linetype'"=="qfit") matrix colnames `y`i'_coefs' = "`x_var'^2" "`x_var'" "_cons"
		}
	
	}

	******* Define the bins *******
	* Specify and/or create the xq var, as necessary
	if "`xq'"=="" {

		if !(`touse_first'==1 & word("`:sortedby'",1)=="`x_r'") sort `touse' `x_r'
	
		if "`discrete'"=="" { /* xq() and discrete are not specified */
			
			* Check whether the number of unique values > nquantiles, or <= nquantiles
			capture mata: characterize_unique_vals_sorted("`x_r'",`touse_first',`touse_last',`nquantiles')

			if (_rc==0) { /* number of unique values <= nquantiles, set to discrete */
				local discrete discrete
				if ("`genxq'"!="") di as text `"note: the x-variable has fewer unique values than the number of bins specified (`nquantiles').  It will therefore be treated as discrete, and genxq() will be ignored"'

				local xq `x_r'
				local nquantiles=r(r)
				if ("`by'"=="") {
					tempname xq_boundaries xq_values
					matrix `xq_boundaries'=r(boundaries)		
					matrix `xq_values'=r(values)
				}
			}
			else if (_rc==134) { /* number of unique values > nquantiles, perform binning */
				if ("`genxq'"!="") local xq `genxq'
				else tempvar xq
	
				if ("`fastxtile'"!="nofastxtile") fastxtile `xq' = `x_r' `wt' in `touse_first'/`touse_last', nq(`nquantiles') randvar(`randvar') randcut(`randcut') randn(`randn')
				else xtile `xq' = `x_r' `wt' in `touse_first'/`touse_last', nq(`nquantiles')

				if ("`by'"=="") {
					mata: characterize_unique_vals_sorted("`xq'",`touse_first',`touse_last',`nquantiles')
					if (r(r)!=`nquantiles') {
						di as text "warning: nquantiles(`nquantiles') was specified, but only `r(r)' were generated. see help file under nquantiles() for explanation."
						local nquantiles=r(r)
					}

					tempname xq_boundaries xq_values
					matrix `xq_boundaries'=r(boundaries)		
					matrix `xq_values'=r(values)
				}
			}
			else {
				error _rc
			}

		}
		
		else { /* discrete is specified, xq() & genxq() are not */
		
			if ("`controls'`absorb'"!="") di as text "warning: discrete is specified in combination with controls() or absorb(). note that binning takes places after residualization, so the residualized x-variable may contain many more unique values."
			
			capture mata: characterize_unique_vals_sorted("`x_r'",`touse_first',`touse_last',`=`samplesize'/2')
		
			if (_rc==0) {
				local xq `x_r'
				local nquantiles=r(r)
				if ("`by'"=="") {
					tempname xq_boundaries xq_values
					matrix `xq_boundaries'=r(boundaries)		
					matrix `xq_values'=r(values)
				}
			}
			else if (_rc==134) {
				di as error "discrete specified, but number of unique values is > (sample size/2)"
				exit 134
			}
			else {
				error _rc
			}
		}
	}
	else {
		if !(`touse_first'==1 & word("`:sortedby'",1)=="`xq'") sort `touse' `xq'
		
		* set nquantiles & boundaries
		mata: characterize_unique_vals_sorted("`xq'",`touse_first',`touse_last',`=`samplesize'/2')

		if (_rc==0) {
			local nquantiles=r(r)
			if ("`by'"=="") {
				tempname xq_boundaries xq_values
				matrix `xq_boundaries'=r(boundaries)		
				matrix `xq_values'=r(values)
			}
		}
		else if (_rc==134) {
			di as error "discrete specified, but number of unique values is > (sample size/2)"
			exit 134
		}
		else {
			error _rc
		}
	}
	
	********** Compute scatter points **********

	if ("`by'"!="") {
		sort `touse' `by' `xq'
		tempname by_boundaries
		mata: characterize_unique_vals_sorted("`by'",`touse_first',`touse_last',`bynum')
		matrix `by_boundaries'=r(boundaries)
	}

	forvalues b=1/`bynum' {
		if ("`by'"!="") {
			mata: characterize_unique_vals_sorted("`xq'",`=`by_boundaries'[`b',1]',`=`by_boundaries'[`b',2]',`nquantiles')
			tempname xq_boundaries xq_values
			matrix `xq_boundaries'=r(boundaries)
			matrix `xq_values'=r(values)
		}
		/* otherwise xq_boundaries and xq_values are defined above in the binning code block */

		* Define x-means
		tempname xbin_means
		if ("`discrete'"=="discrete") {
			matrix `xbin_means'=`xq_values'
		}
		else {
			means_in_boundaries `x_r' `wt', bounds(`xq_boundaries') `medians'
			matrix `xbin_means'=r(means)
		}

		* LOOP over y-vars to define y-means
		local counter_depvar=0
		foreach depvar of varlist `y_vars_r' {
			local ++counter_depvar

			means_in_boundaries `depvar' `wt', bounds(`xq_boundaries') `medians'

			* store to matrix
			if (`b'==1) {
				tempname y`counter_depvar'_scatterpts
				matrix `y`counter_depvar'_scatterpts' = `xbin_means',r(means)
			}
			else {
				* make matrices conformable before right appending			
				local rowdiff=rowsof(`y`counter_depvar'_scatterpts')-rowsof(`xbin_means')
				if (`rowdiff'==0) matrix `y`counter_depvar'_scatterpts' = `y`counter_depvar'_scatterpts',`xbin_means',r(means)
				else if (`rowdiff'>0)  matrix `y`counter_depvar'_scatterpts' = `y`counter_depvar'_scatterpts', ( (`xbin_means',r(means)) \ J(`rowdiff',2,.) )
				else /*(`rowdiff'<0)*/ matrix `y`counter_depvar'_scatterpts' = ( `y`counter_depvar'_scatterpts' \ J(-`rowdiff',colsof(`y`counter_depvar'_scatterpts'),.) ) ,`xbin_means',r(means)
			}
		}
	}


	*********** With Binsreg ***********
	if "`binsreg'"=="binsreg" {	/* This saves the matrix of scatterpoints from binsreg and replaces the one from binscatterhist */	
	if "`nquantiles'"!="" local addnbins "nbins(`nquantiles')"
	tempfile binsreg_points
	qui binsreg `y_vars' `x_var' `controls' `wt' if `touse', `addnbins' savedata(`binsreg_points')
	preserve
	use `binsreg_points', clear
	mkmat dots_x dots_fit, matrix(`y1_scatterpts')
	qui ereplace dots_binid=max(dots_binid)
	local binsreg_nbins=dots_binid[1]
	restore
		if "`binsreg_nbins'"!="`nquantiles'" {
		di as error "Binsreg is selecting `binsreg_nbins' bins, update your nquantiles option or replace with `binsreg_nbins'"
		exit
		}
	}	
	*

	*********** Perform Graphing ***********

	* If rd is specified, prepare xline parameters
	if "`rd'"!="" {
		foreach xval in "`rd'" {
			local xlines `xlines' xline(`xval', lpattern(dash) lcolor(gs8))
		}
	}

	* Fill colors if missing
	if `"`colors'"'=="" local colors ///
		navy maroon forest_green dkorange teal cranberry lavender ///
		khaki sienna emidblue emerald brown erose gold bluishgray ///
		/* lime magenta cyan pink blue */
	if `"`mcolors'"'=="" {
		if (`ynum'==1 & `bynum'==1 & "`linetype'"!="connect") local mcolors `: word 1 of `colors''
		else local mcolors `colors'
	}
	if `"`lcolors'"'=="" {
		if (`ynum'==1 & `bynum'==1 & "`linetype'"!="connect") local lcolors `: word 2 of `colors''
		else local lcolors `colors'
	}
	local num_mcolor=wordcount(`"`mcolors'"')
	local num_lcolor=wordcount(`"`lcolors'"')

	* markers adjustments based on scatter - basically switch the moptions from scatter to binscatter and viceversa (plus setting the default) (this should be after the section above on graphing but before the prepare connect & msymbol)
	if ("`scatter'"!="") {
	* save the binned scatter options 
	local binnedmsymbols "`msymbols'"
	local binnedmcolors "`mcolors'"
	local binnedmsize "`msize'"

	* transfer the scattermoptions to the main scatter
	local msymbols "Oh"
	local mcolors "gray%30"
	local msize "tiny"
	if ("`scattermsymbols'"!="") local msymbols "`scattermsymbols'"
	if ("`scattermcolors'"!="") local mcolors "`scattermcolor'"
	if ("`scattermsize'"!="") local msize "`scattermsize'"
	} 
	
	* Prepare connect & msymbol options
	if ("`linetype'"=="connect") local connect "c(l)"
	if "`msymbols'"!="" {
		local symbol_prefix "msymbol("
		local symbol_suffix ")"
	}

	
	*** Prepare scatters
	
	* c indexes which color is to be used
	local c=0
	
	local counter_series=0
	
	* LOOP over by-vars
	local counter_by=0
	if ("`by'"=="") local noby="noby"
	foreach byval in `byvals' `noby' {
		local ++counter_by
		
		local xind=`counter_by'*2-1
		local yind=`counter_by'*2

		* LOOP over y-vars
		local counter_depvar=0
		foreach depvar of varlist `y_vars' {
			local ++counter_depvar
			local ++c
			
			* LOOP over rows (each row contains a coordinate pair)
			local row=1
			local xval=`y`counter_depvar'_scatterpts'[`row',`xind']
			local yval=`y`counter_depvar'_scatterpts'[`row',`yind']
			
			if !missing(`xval',`yval') {
				local ++counter_series
				local scatters `scatters' (scatteri
				if ("`savedata'"!="") {
					if ("`by'"=="") local savedata_scatters `savedata_scatters' (scatter `depvar' `x_var'
					else local savedata_scatters `savedata_scatters' (scatter `depvar'_by`counter_by' `x_var'_by`counter_by'
				}
			}
			else {
				* skip the rest of this loop iteration
				continue
			}
			
			while (`xval'!=. & `yval'!=.) {
				local scatters `scatters' `yval' `xval'
			
				local ++row
				local xval=`y`counter_depvar'_scatterpts'[`row',`xind']
				local yval=`y`counter_depvar'_scatterpts'[`row',`yind']
			}
			
			* Add options
			if ("`msize'"!="") local addmsize "msize(`msize')" /* to add msize to the scatter options in the scatters local */
			local scatter_options `connect' `addmsize' mcolor(`: word `c' of `mcolors'') lcolor(`: word `c' of `lcolors'') `symbol_prefix'`: word `c' of `msymbols''`symbol_suffix'
			local scatters `scatters', `scatter_options')
			if ("`savedata'"!="") local savedata_scatters `savedata_scatters', `scatter_options')
	
			/*
			* Add legend
			if "`by'"=="" {
				if (`ynum'==1) local legend_labels off
				else local legend_labels `legend_labels' lab(`counter_series' `depvar')
			}
			else {
				if ("`bylabel'"=="") local byvalname=`byval'
				else {
					local byvalname `: label `bylabel' `byval''
				}
			
				if (`ynum'==1) local legend_labels `legend_labels' lab(`counter_series' `byvarname'=`byvalname')
				else local legend_labels `legend_labels' lab(`counter_series' `depvar': `byvarname'=`byvalname')
			}
			if ("`by'"!="" | `ynum'>1) local order `order' `counter_series'
			*/
		}
		
	}
	
	*** Fit lines
		
	if inlist(`"`linetype'"',"lfit","qfit") {
	
		* c indexes which color is to be used
		local c=0
		
		local rdnum=wordcount("`rd'")+1
		
		tempname fitline_bounds
		if ("`rd'"=="") matrix `fitline_bounds'=.,.
		else matrix `fitline_bounds'=.,`=subinstr("`rd'"," ",",",.)',.

		* LOOP over by-vars
		local counter_by=0
		if ("`by'"=="") local noby="noby"
		foreach byval in `byvals' `noby' {
			local ++counter_by
			
			** Set the column for the x-coords in the scatterpts matrix
			local xind=`counter_by'*2-1
			
			* Set the row to start seeking from
			*     note: each time we seek a coeff, it should be from row (rd_num)(counter_by-1)+counter_rd
			local row0=( `rdnum' ) * (`counter_by' - 1)
			
			
			* LOOP over y-vars
			local counter_depvar=0
			foreach depvar of varlist `y_vars_r' {
				local ++counter_depvar
				local ++c
				
				* Find lower and upper bounds for the fit line
				matrix `fitline_bounds'[1,1]=`y`counter_depvar'_scatterpts'[1,`xind']
				local fitline_ub_rindex=`nquantiles'
				local fitline_ub=.
				while `fitline_ub'==. {
					local fitline_ub=`y`counter_depvar'_scatterpts'[`fitline_ub_rindex',`xind']
					local --fitline_ub_rindex
				}
				matrix `fitline_bounds'[1,`rdnum'+1]=`fitline_ub'
				*if "`binsreg'"!="" matrix `fitline_bounds'=[]
				* LOOP over rd intervals
				forvalues counter_rd=1/`rdnum' {
					
					if (`"`linetype'"'=="lfit") {
						local coef_quad=0
						local coef_lin=`y`counter_depvar'_coefs'[`row0'+`counter_rd',1]
						local coef_cons=`y`counter_depvar'_coefs'[`row0'+`counter_rd',2]
					}
					else if (`"`linetype'"'=="qfit") {
						local coef_quad=`y`counter_depvar'_coefs'[`row0'+`counter_rd',1]
						local coef_lin=`y`counter_depvar'_coefs'[`row0'+`counter_rd',2]
						local coef_cons=`y`counter_depvar'_coefs'[`row0'+`counter_rd',3]
					}	
					if !missing(`coef_quad',`coef_lin',`coef_cons') {
						local leftbound=`fitline_bounds'[1,`counter_rd']
						local rightbound=`fitline_bounds'[1,`counter_rd'+1]
						if ("`binsreg'"=="") local fits `fits' (function `coef_quad'*x^2+`coef_lin'*x+`coef_cons', range(`leftbound' `rightbound') lcolor(`: word `c' of `lcolors''))
						if ("`binsreg'"!="") local fits `fits' (function `coef_quad'*x^2+`coef_lin'*x+`binsreg_const', range(`leftbound' `rightbound') lcolor(`: word `c' of `lcolors''))
					}
				}
			}
		}

	}
	
	* Prepare y-axis title
	if (`ynum'==1) local ytitle `y_vars'
	else if (`ynum'==2) local ytitle : subinstr local y_vars " " " and "
	else local ytitle : subinstr local y_vars " " "; ", all

	* Save data
	if ("`savedata'"!="") {
	
		*** Save a CSV containing the scatter points
		tempname savedatafile
		file open `savedatafile' using `"`savedata'.csv"', write text `replace'
		
		* LOOP over rows
		forvalues row=0/`nquantiles' {
		
			*** Put the x-variable at the left
			* LOOP over by-vals
			forvalues counter_by=1/`bynum' {
			
				if (`row'==0) { /* write variable names */
					if "`by'"!="" local bynlabel _by`counter_by'
					file write `savedatafile' "`x_var'`bynlabel',"
				}
				else { /* write data values */
					if (`row'<=`=rowsof(`y1_scatterpts')') file write `savedatafile' (`y1_scatterpts'[`row',`counter_by'*2-1]) ","
					else file write `savedatafile' ".,"
				}
			}
			
			*** Now y-variables at the right
			
			* LOOP over y-vars
			local counter_depvar=0
			foreach depvar of varlist `y_vars' {
				local ++counter_depvar

				* LOOP over by-vals
				forvalues counter_by=1/`bynum' {
				
				
					if (`row'==0) { /* write variable names */
						if "`by'"!="" local bynlabel _by`counter_by'
						file write `savedatafile' "`depvar'`bynlabel'"
					}
					else { /* write data values */
						if (`row'<=`=rowsof(`y`counter_depvar'_scatterpts')') file write `savedatafile' (`y`counter_depvar'_scatterpts'[`row',`counter_by'*2])
						else file write `savedatafile' "."
					}
					
					* unless this is the last variable in the dataset, add a comma
					if !(`counter_depvar'==`ynum' & `counter_by'==`bynum') file write `savedatafile' ","
					
				} /* end by-val loop */
				
			} /* end y-var loop */
			
			file write `savedatafile' _n
			
		} /* end row loop */

		file close `savedatafile'
		di as text `"(file `savedata'.csv written containing saved data)"'
		
		
		
		*** Save a do-file with the commands to generate a nicely labeled dataset and re-create the binscatter graph
		
		file open `savedatafile' using `"`savedata'.do"', write text `replace'
		
		file write `savedatafile' `"insheet using `savedata'.csv"' _n _n
		
		if "`by'"!="" {
			foreach var of varlist `x_var' `y_vars' {
				local counter_by=0
				foreach byval in `byvals' {
					local ++counter_by
					if ("`bylabel'"=="") local byvalname=`byval'
					else {
						local byvalname `: label `bylabel' `byval''
					}
					file write `savedatafile' `"label variable `var'_by`counter_by' "`var'; `byvarname'==`byvalname'""' _n
				}
			}
			file write `savedatafile' _n
		}
		
		file write `savedatafile' `"`savedata_graphcmd'"' _n
		
		file close `savedatafile'
		di as text `"(file `savedata'.do written containing commands to process saved data)"'
		
	}
	
	**** Save parameters, used both for histogram and coefficient report
	* Save parameters from variables of interest
	tempname t_min_`x_var' t_max_`x_var' t_r_`x_var' t_min_`y_vars' t_max_`y_vars' t_r_`y_vars'
	cap sum `x_var' if `touse'==1
	scalar `t_min_`x_var''=r(min) 
	scalar `t_max_`x_var''=r(max) 
	scalar `t_r_`x_var''=abs(r(min)-r(max))
	cap sum `y_vars' if `touse'==1
	scalar `t_min_`y_vars''=r(min) 
	scalar `t_max_`y_vars''=r(max) 
	scalar `t_r_`y_vars''=abs(r(min)-r(max))
	
	* Save parameters from binscatter
	mata: st_numscalar("max_b_x", max(st_matrix("`y1_scatterpts'")[,1]))
	mata: st_numscalar("min_b_x", min(st_matrix("`y1_scatterpts'")[,1]))
	mata: st_numscalar("max_b_y", max(st_matrix("`y1_scatterpts'")[,2]))
	mata: st_numscalar("min_b_y", min(st_matrix("`y1_scatterpts'")[,2]))
		
	tempname t_minb_`x_var' t_maxb_`x_var' t_rb_`x_var' t_minb_`y_vars' t_maxb_`y_vars' t_rb_`y_vars'
	scalar `t_minb_`x_var''=min_b_x
	scalar `t_maxb_`x_var''=max_b_x
	scalar `t_rb_`x_var''=abs(`t_minb_`x_var''-`t_maxb_`x_var'')
	cap sum `y_vars'
	scalar `t_minb_`y_vars''=min_b_y 
	scalar `t_maxb_`y_vars''=max_b_y
	scalar `t_rb_`y_vars''=abs(`t_minb_`y_vars''-`t_maxb_`y_vars'')
		
		
	****** Add histogram ******
	if ("`histogram'"!="") {
	
	* Default positioning for graphs
	if ("`xmin'"=="") {
	local xmin=`t_minb_`x_var''-(1/8)*`t_rb_`x_var''+(1/100)*`t_rb_`x_var''	 
	}		
	if ("`ymin'"=="") {
	local ymin=`t_minb_`y_vars''-(1/8)*`t_rb_`y_vars''+(1/100)*`t_rb_`y_vars''	 
	}		

	* default options for histograms
	* formerly xhistbarheight(real 10) yhistbarheight(real 10) xhistbarwidth(real 50) yhistbarwidth(real 50) xhistbins(real 20) yhistbins(real 20)
	if ("`xhistbarheight'"=="") {
	local xhistbarheight=10
	}	
	if ("`yhistbarheight'"=="") {
	local yhistbarheight=10
	}	
	if ("`xhistbarwidth'"=="") {
	local xhistbarwidth=100
	}	
	if ("`yhistbarwidth'"=="") {
	local yhistbarwidth=100
	}	
	if ("`xhistbins'"=="") {
	local xhistbins=20
	}	
	if ("`yhistbins'"=="") {
	local yhistbins=20
	}	
	* depending on which stata installed
	if `stata_version'>=15 {
		if ("`xcolor'"=="") {
		local xcolor_default="color(teal%50)"
		}
		if ("`ycolor'"=="") {
		local ycolor_default="color(maroon%50)"
		}
	}
	if `stata_version'<15 {
		if ("`xcolor'"=="") {
		local xcolor_default="color(teal)"
		}
		if ("`ycolor'"=="") {
		local ycolor_default="color(maroon)"
		}
	}
	* adjustment in case no option specified (color not included as a default)
	if ("`xcolor'"!="") {
	local xcolor="color(`xcolor')"
	}
	if ("`xcfcolor'"!="") {
	local xcfcolor="cfcolor(`xcfcolor')"
	local xcolor_default=""
	}
	if ("`xfintensity'"!="") {
	local xfintensity="fintensity(`xfintensity')"
	local xcolor_default=""
	}
	if ("`xlcolor'"!="") {
	local xlcolor="lcolor(`xlcolor')"
	local xcolor_default=""
	}
	if ("`xlwidth'"!="") {
	local xlwidth="lwidth(`xlwidth')"
	local xcolor_default=""
	}
	if ("`xlpattern'"!="") {
	local xlpattern="lpattern(`xlpattern')"
	local xcolor_default=""
	}
	if ("`xlalign'"!="") {
	local xlalign="lalign(`xlalign')"
	local xcolor_default=""
	}
	if ("`xlstyle'"!="") {
	local xlstyle="lstyle(`xlstyle')"
	local xcolor_default=""
	}
	if ("`xpstyle'"!="") {
	local xpstyle="pstyle(`xpstyle')"
	local xcolor_default=""
	}
	if ("`xbstyle'"!="") {
	local xbstyle="bstyle(`xbstyle')"
	local xcolor_default=""
	}     
 	if ("`ycolor'"!="") {
	local ycolor="color(`ycolor')"
	}
	if ("`ycfcolor'"!="") {
	local ycfcolor="cfcolor(`ycfcolor')"
	local ycolor_default=""
	}
	if ("`yfintensity'"!="") {
	local yfintensity="fintensity(`yfintensity')"
	local ycolor_default=""
	}
	if ("`ylcolor'"!="") {
	local ylcolor="lcolor(`ylcolor')"
	local ycolor_default=""
	}
	if ("`ylwidth'"!="") {
	local ylwidth="lwidth(`ylwidth')"
	local ycolor_default=""
	}
	if ("`ylpattern'"!="") {
	local ylpattern="lpattern(`ylpattern')"
	local ycolor_default=""
	}
	if ("`ylalign'"!="") {
	local ylalign="lalign(`ylalign')"
	local ycolor_default=""
	}
	if ("`ylstyle'"!="") {
	local ylstyle="lstyle(`ylstyle')"
	local ycolor_default=""
	}
	if ("`ypstyle'"!="") {
	local ypstyle="pstyle(`ypstyle')"
	local ycolor_default=""
	}      
	if ("`ybstyle'"!="") {
	local ybstyle="bstyle(`ybstyle')"
	local ycolor_default=""
	}     
	
	* Generate histogram of vars, within the binscatter graph boundaries
	tempname bin_`x_var' bin_`y_vars'
	scalar `bin_`x_var''=round(`t_r_`x_var''/`t_rb_`x_var''*`xhistbins') /* we need to set a number of bins so that after cutting the edges to fit in the interval of the binscatter, 20 or the desired number of bins are left, so we have bin(z) where z equal to the range of the variable time * desired amount of bins / range of binscatter */
	scalar `bin_`y_vars''=round(`t_r_`y_vars''/`t_rb_`y_vars''*`yhistbins') /* we need to set a number of bins so that after cutting the edges to fit in the interval of the binscatter, 20 or the desired number of bins are left, so we have bin(z) where z equal to the range of the variable time * desired amount of bins / range of binscatter */
	tempvar t_d_`x_var' t_b_`x_var' t_d_`y_vars' t_b_`y_vars'
	local temporanea1=`bin_`x_var''
	local temporanea2=`bin_`y_vars''
	cap twoway__histogram_gen `x_var' if `touse'==1 ,  bin(`temporanea1') gen(`t_d_`x_var'' `t_b_`x_var'') /* save density and bins number and width as two columns */
	cap twoway__histogram_gen `y_vars' if `touse'==1,  bin(`temporanea2') gen(`t_d_`y_vars'' `t_b_`y_vars'') /* save density and bins number and width as two columns */
	macro drop temporanea1 temporanea2
	
	* Save parameters from histogram	
	tempname t_minh_`x_var' t_maxh_`x_var' t_rh_`x_var' t_minh_`y_vars' t_maxh_`y_vars' t_rh_`y_vars'
	cap sum `t_d_`x_var''
	scalar `t_minh_`x_var''=r(min) 
	scalar `t_maxh_`x_var''=r(max) 
	scalar `t_rh_`x_var''=abs(r(min)-r(max))
	cap sum `t_d_`y_vars''
	scalar `t_minh_`y_vars''=r(min) 
	scalar `t_maxh_`y_vars''=r(max) 
	scalar `t_rh_`y_vars''=abs(r(min)-r(max))	
	
	* Rescale the histogram	
	tempname rc_`x_var' rc_`y_vars'
	scalar `rc_`x_var''=(`t_maxh_`x_var''*100)/(`xhistbarheight'*`t_rb_`y_vars'') /* we want the maximum value of the distribution divided by some value so that the result is 1/10, or desired quantity, this part will ensure that the distribution occupies only 1/10 of the height of the graph. */
	scalar `rc_`y_vars''=(`t_maxh_`y_vars''*100)/(`yhistbarheight'*`t_rb_`x_var'') /* we want the maximum value of the distribution divided by some value so that the result is 1/10, or desired quantity, this part will ensure that the distribution occupies only 1/10 of the height of the graph. */
	cap replace `t_d_`x_var''=(`t_d_`x_var''/`rc_`x_var'')+(`ymin') /* The height will be e.g. 10% of the graph roughly, the sum of the min is so that the 10% starts at the bottom of the axis */
	cap replace `t_d_`y_vars''=(`t_d_`y_vars''/`rc_`y_vars'')+(`xmin') /* The height will be e.g. 10% of the graph roughly, the sum of the min is so that the 10% starts at the bottom of the axis */
	cap replace `t_b_`x_var''=. if `t_b_`x_var''<`t_minb_`x_var'' | `t_b_`x_var''>`t_maxb_`x_var'' /* don't plot points outisderange from binscatter */
	cap replace `t_b_`y_vars''=. if `t_b_`y_vars''<`t_minb_`y_vars'' | `t_b_`y_vars''>`t_maxb_`y_vars'' /* don't plot points outisderange from binscatter */	
	
	* Adjust width of bars in percentage: e.g. 100% they touch each other
	tempname barw_`x_var' barw_`y_vars'
	scalar `barw_`x_var''=(`t_rb_`x_var''/`xhistbins')*(`xhistbarwidth'/100)
	scalar `barw_`y_vars''=(`t_rb_`y_vars''/`xhistbins')*(`yhistbarwidth'/100)

	}
	
	****** Coefficient, s.e., p-value and sample report ******	
	if "`coefficient'"!="" | "`sample'"!="" {
		
		* P-value and CI
		if "`ci'"!=""{
		local ci2=(`ci'+((1-`ci'/100)/2)*100)/100
		}
			if "`binsreg'"==""{
			mat eb=e(b)
			mat eV=e(V)
			mat mat_edf_r=e(df_r)
			local edf_r=mat_edf_r[1,1]
			}
			if "`binsreg'"!=""{
			mat eb=binsreg_eb
			mat eV=binsreg_eV
			local edf_r=`binsreg_edf_r'
			}		
		local tstatistic = eb[1,1]/sqrt(eV[1,1])
		local pval=2*ttail((`edf_r'),abs(`tstatistic'))  /* 		local pval=2*ttail(e(df_r),abs(`tstatistic')) */
		if "`ci'"!=""{
		local ci2=(`ci'+((1-`ci'/100)/2)*100)/100
		local lci=eb[1,1]-invnormal(`ci2')*sqrt(eV[1,1])
		local uci=eb[1,1]+invnormal(`ci2')*sqrt(eV[1,1])
		}
			if "`stars'"=="nostars" | "`stars'"=="nostar"{
			local addstars=""
			}	
			if "`stars'"=="1"|"`stars'"==""{
				if `pval'<=0.05 & `pval'>0.01{
				local addstars="*"
				}
				if `pval'<=0.01{
				local addstars="**"
				}
			}
			if "`stars'"=="2"{
				if `pval'<=0.10 & `pval'>0.05{
				local addstars="+"
				}
				if `pval'<=0.05 & `pval'>0.01{
				local addstars="*"
				}
				if `pval'<=0.01{
				local addstars="**"
				}
			}	
			if "`stars'"=="3"{
				if `pval'<=0.10 & `pval'>0.05{
				local addstars="+"
				}
				if `pval'<=0.05 & `pval'>0.01{
				local addstars="*"
				}
				if `pval'<=0.01 & `pval'>0.001{
				local addstars="**"
				}
				if `pval'<=0.001{
				local addstars="***"
				}
			}
			if "`stars'"=="4"{
				if `pval'<=0.05 & `pval'>0.01{
				local addstars="*"
				}
				if `pval'<=0.01 & `pval'>0.001{
				local addstars="**"
				}
				if `pval'<=0.001{
				local addstars="***"
				}
			}	
			
		* Coef and sample (and ci rounding)
		if ("`coefficient'"=="") local rounding=0.01
		if ("`coefficient'"!="") local rounding=`coefficient'
		local uci=round(`uci',`rounding')
		local uci: display `uci'
		local lci=round(`lci',`rounding')
		local lci: display `lci'
			if eb[1,1]>0 {
			local xmin_coef=`t_maxb_`x_var'' -(1/6)*`t_rb_`x_var''
			local ymin_coef=`t_minb_`y_vars''+(1/16)*`t_rb_`y_vars''
			tempvar nsize
			local beta=round(eb[1,1],`rounding')
			local beta: display `beta'
			*local pval=round(`pval',`rounding')
			*local pval: display `pval'
			local standerr=round(sqrt(eV[1,1]),`rounding')
			local standerr: display `standerr'
				if "`binsreg'"==""{
				cap egen `nsize'=total(e(sample))
				local sampsize=`nsize'[1]
				}
				else {
				local sampsize=e(N)
				}
			}
			if eb[1,1]<0 {
			local xmin_coef=`t_minb_`x_var'' +(1/6)*`t_rb_`x_var''
			local ymin_coef=`t_minb_`y_vars''+(1/16)*`t_rb_`y_vars''
			tempvar nsize
			local beta=round(eb[1,1],`rounding')
			local beta: display `beta'
			*local pval=round(`pval',`rounding')
			*local pval: display `pval'
			local standerr=round(sqrt(eV[1,1]),`rounding')
			local standerr: display `standerr'
				if "`binsreg'"==""{
				cap egen `nsize'=total(e(sample))
				local sampsize=`nsize'[1]
				}
				else {
				local sampsize=e(N)
				}		
			}
			if "`boxposition'"!="" {
			local counter_box=0
				foreach value in `boxposition'{
				local counter_box=`counter_box'+1
					if (`counter_box'==1) local xmin_coef=`value'
					if (`counter_box'==2) local ymin_coef=`value'
				}
			}
			
		* Label
		local sign="="
		if `pval'>=0.0001{
		local pval=round(`pval',0.0001)
		}
		if `pval'<0.0001{
		local pval=".0001"
		local sign="<"
		}
		if "`coefficient'"!="" & "`sample'"=="sample" & "`pvalue'"=="" & "`ci'"==""{
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "Coef = `beta'`addstars'  (`standerr')" "N = `sampsize'", size(3) box fcolor(none) margin(vsmall))"'
		}	
		if "`coefficient'"!="" & "`sample'"=="sample" & "`pvalue'"=="" & "`ci'"!=""{
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "Coef = `beta'`addstars'  (`standerr')" "`ci'% CI = [`lci' , `uci']" "N = `sampsize'", size(3) box fcolor(none) margin(vsmall))"'
		}	
		if "`coefficient'"!="" & "`sample'"=="sample" & "`pvalue'"=="pvalue" & "`ci'"==""{
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "Coef = `beta'`addstars'  (`standerr')" "Pvalue `sign' `pval'" "N = `sampsize'", size(3) box fcolor(none) margin(vsmall))"'
		}			
		if "`coefficient'"!="" & "`sample'"=="sample" & "`pvalue'"=="pvalue" & "`ci'"!=""{
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "Coef = `beta'`addstars'  (`standerr')" "`ci'% CI = [`lci' , `uci']" "Pvalue `sign' `pval'" "N = `sampsize'", size(3) box fcolor(none) margin(vsmall))"'
		}			
		*
		if "`coefficient'"!="" & "`sample'"=="" & "`pvalue'"=="" & "`ci'"==""{
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "Coef = `beta'`addstars'  (`standerr')", size(3) box fcolor(none) margin(vsmall))"'
		}
		if "`coefficient'"!="" & "`sample'"=="" & "`pvalue'"=="" & "`ci'"!=""{
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "Coef = `beta'`addstars'  (`standerr')" "`ci'% CI = [`lci' , `uci']", size(3) box fcolor(none) margin(vsmall))"'
		}		
		if "`coefficient'"!="" & "`sample'"=="" & "`pvalue'"=="pvalue" & "`ci'"==""{
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "Coef = `beta'`addstars'  (`standerr')" "Pvalue `sign' `pval'", size(3) box fcolor(none) margin(vsmall))"'
		}		
		if "`coefficient'"!="" & "`sample'"=="" & "`pvalue'"=="pvalue" & "`ci'"!=""{
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "Coef = `beta'`addstars'  (`standerr')" "`ci'% CI = [`lci' , `uci']" "Pvalue `sign' `pval'", size(3) box fcolor(none) margin(vsmall))"'
		}
		*
		if "`coefficient'"=="" & "`sample'"=="sample" {
		local coefficient_report=`"text(`ymin_coef' `xmin_coef' "N = `sampsize'", size(3) box fcolor(none) margin(vsmall))"'
		}
	}
	*

	* Legend
	local check_legend=strpos("`options'","legend")
	if "`check_legend'"=="0" local legendoff="legend(off)"
	* Display the graph
	if ("`histogram'"!="") {
	local graphcmd twoway `scatters' `fits', graphregion(fcolor(white)) `xlines' xtitle(`x_var') ytitle(`ytitle') `addname' `options' `coefficient_report' nodraw `legendoff' `options_scatter' `legacy' 
	if ("`savedata'"!="") local savedata_graphcmd twoway `savedata_scatters' `fits', graphregion(fcolor(white)) `xlines' xtitle(`x_var') ytitle(`ytitle' `addname' `options' `coefficient_report' 
	}
	if ("`histogram'"=="") {
	local graphcmd twoway `scatters' `fits', graphregion(fcolor(white)) `xlines' xtitle(`x_var') ytitle(`ytitle') legend(`legend_labels' order(`order')) `addname' `options' `coefficient_report' `legendoff' `options_scatter' `legacy'
	if ("`savedata'"!="") local savedata_graphcmd twoway `savedata_scatters' `fits', graphregion(fcolor(white)) `xlines' xtitle(`x_var') ytitle(`ytitle') legend(`legend_labels' order(`order')) `addname' `options' `coefficient_report'
	}
	`graphcmd'
	if ("`histogram'"!="") {
	local temp_barwidth1=`barw_`x_var''
	local temp_barwidth2=`barw_`y_vars''
		if ("`histogram'"=="`x_var'") {
		addplot `name_addplot': bar `t_d_`x_var'' `t_b_`x_var'', barwidth(`temp_barwidth1') base(`ymin') /* general options */ `xcolor' `xcfcolor' `xfintensity' `xlcolor' `xlwidth' `xlpattern' `xlalign' `xlstyle' `xbstyle' `xpstyle' `xbstyle' `xcolor_default'/* add histogram */ `options' `legendoff'
		}
		if ("`histogram'"=="`y_vars'") {
		addplot `name_addplot': bar `t_d_`y_vars'' `t_b_`y_vars'', horizontal base(`xmin') barwidth(`temp_barwidth2') /* general options */ `ycolor' `ycfcolor' `yfintensity' `ylcolor' `ylwidth' `ylpattern' `ylalign' `ylstyle' `ybstyle' `ypstyle' `ybstyle' `ycolor_default'/* add histogram */ `options' `legendoff'
		}
		if ("`histogram'"=="`x_var' `y_vars'") | ("`histogram'"=="`y_vars' `x_var'") {
		qui addplot `name_addplot': bar `t_d_`x_var'' `t_b_`x_var'', barwidth(`temp_barwidth1') base(`ymin') /* general options */ `xcolor' `xcfcolor' `xfintensity' `xlcolor' `xlwidth' `xlpattern' `xlalign' `xlstyle' `xbstyle' `xpstyle' `xbstyle' `xcolor_default'/* add histogram */ `options' 
		addplot `name_addplot': bar `t_d_`y_vars'' `t_b_`y_vars'', horizontal base(`xmin') barwidth(`temp_barwidth2') /* general options */ `ycolor' `ycfcolor' `yfintensity' `ylcolor' `ylwidth' `ylpattern' `ylalign' `ylstyle' `ybstyle' `ypstyle' `ybstyle' `ycolor_default'/* add histogram */ `options' `legendoff'
		}
	}
	if ("`scatter'"!="") {
		if ("`if'"!="") local add2if "[`if']"
		if ("`in'"!="") local add2in "[`in']"
		if ("`aweight'"!="") local add2aweight "[aw=`aweight']"
		if ("`fweight'"!="") local add2fweight "[fw=`fweight']"
		if ("`by'"!="") local add2by "by(`by')"
		local add2nquantiles "Nquantiles(`nquantiles2')"
		if ("`genxq'"!="") local add2genxq "GENxq(`genxq')"
		if ("`controls'"!="") local add2controls "CONTROLs(`controls')"
		if ("`absorb'"!="") local add2absorb "`absorb'"
		if ("`regtype'"!="") local add2regtype "REGtype(`regtype')"
		if ("`linetype'"!="") local add2linetype "LINEtype(`linetype')"
		if ("`rd'"!="") local add2rd "rd(`rd')"
		if ("`binnedmcolors'"!="") local add2mcolors "mcolors(`binnedmcolors')" /* binned mcolor */
		if ("`binnedmcolors'"=="") local add2mcolors "mcolors(navy)" /* binned mcolor */
		if ("`binnedmsymbols'"!="") local add2msymbols "msymbols(`binnedmsymbols')" /* binned msymbol */
		if ("`binnedmsymbols'"=="") local add2msymbols "msymbols(Oh)" /* binned msymbol */
		if ("`binnedmsize'"!="") local add2msize "msize(`binnedmsize')" /* binned msize */
		if ("`binnedmsize'"=="") local add2msize "msize(small)" /* binned msize */
		if ("`randvar'"!="") local add2randvar "randvar(`randvar')"
		if ("`randcut'"!="") local add2randcut "randcut(`randcut')"
		if ("`randn'"!="") local add2randn "randn(`randn')"
		if ("`nbins'"!="") local add2nbins "nbins(`nbins')"
		if ("`method'"!="") local add2method "method(`method')"
		if ("`unique'"!="") local add2unique "unique(`unique')"
		if ("`name'"!="") local add2name "name(`name')"
		if ("`legacy'"!="") local add2legacy "legacy(`legacy')"
		
		
		macro drop xq discrete
		addplot_scatter `varlist' `add2if' `add2in' `add2aweight' `add2fweight', `add2by' `add2nquantiles' `add2genxq' `add2controls' `add2absorb' `add2regtype' `add2linetype' `add2rd' `add2randvar' `add2randcut' `add2randn' `add2nbins' `add2x_q' `add2method' `add2unique' `add2name' `add2legacy' `binsreg' `nofastxtile' `medians' `noaddmean' `create_xq' `add2mcolors' `add2msize' `add2msymbols'
		}

	* Save graph
	if `"`savegraph'"'!="" {
		* check file extension using a regular expression
		if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") local graphextension=regexs(0)
		
		if inlist(`"`graphextension'"',".gph","") graph save `"`savegraph'"', `replace'
		else graph export `"`savegraph'"', `replace'
	}
	
	*** Return items	
	return scalar N = `samplesize'
	
	return local graphcmd `"`graphcmd'"'
	if inlist("`linetype'","lfit","qfit") {
		forvalues yi=`ynum'(-1)1 {
			return matrix y`yi'_coefs=`y`yi'_coefs'
		}
	}
	
	if ("`rd'"!="") {
		tempname rdintervals
		matrix `rdintervals' = (. \ `=subinstr("`rd'"," ","\",.)' ) , ( `=subinstr("`rd'"," ","\",.)' \ .)

		forvalues i=1/`=rowsof(`rdintervals')' {
			local rdintervals_labels `rdintervals_labels' rd`i'
		}
		matrix rownames `rdintervals' = `rdintervals_labels'
		matrix colnames `rdintervals' = gt lt_eq
		return matrix rdintervals=`rdintervals'
	}
	
	if ("`by'"!="" & "`by'"=="`byvarname'") { /* if a numeric by-variable was specified */
		forvalues i=1/`=rowsof(`byvalmatrix')' {
			local byvalmatrix_labels `byvalmatrix_labels' by`i'
		}
		matrix rownames `byvalmatrix' = `byvalmatrix_labels'
		matrix colnames `byvalmatrix' = `by'
		return matrix byvalues=`byvalmatrix'
	}
	
	
end


**********************************

* Helper programs
program define means_in_boundaries, rclass
	version 12.1

	syntax varname(numeric) [aweight fweight], BOUNDsmat(name) [MEDians]
	
	* Create convenient weight local
	if ("`weight'"!="") local wt [`weight'`exp']
	
	local r=rowsof(`boundsmat')
	matrix means=J(`r',1,.)
	
	if ("`medians'"!="medians") {
		forvalues i=1/`r' {
			sum `varlist' in `=`boundsmat'[`i',1]'/`=`boundsmat'[`i',2]' `wt', meanonly
			matrix means[`i',1]=r(mean)
		}
	}
	else {
		forvalues i=1/`r' {
			_pctile `varlist' in `=`boundsmat'[`i',1]'/`=`boundsmat'[`i',2]' `wt', percentiles(50)
			matrix means[`i',1]=r(r1)
		}
	}
	
	return clear
	return matrix means=means

end

*** copy of: version 1.21  8oct2013  Michael Stepner, stepner@mit.edu
program define fastxtile, rclass
	version 11

	* Parse weights, if any
	_parsewt "aweight fweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */

	* Extract parameters
	syntax newvarname=/exp [if] [in] [,Nquantiles(integer 2) Cutpoints(varname numeric) ALTdef ///
		CUTValues(numlist ascending) randvar(varname numeric) randcut(real 1) randn(integer -1)]

	* Mark observations which will be placed in quantiles
	marksample touse, novarlist
	markout `touse' `exp'
	qui count if `touse'
	local popsize=r(N)

	if "`cutpoints'"=="" & "`cutvalues'"=="" { /***** NQUANTILES *****/
		if `"`wt'"'!="" & "`altdef'"!="" {
			di as error "altdef option cannot be used with weights"
			exit 198
		}
		
		if `randn'!=-1 {
			if `randcut'!=1 {
				di as error "cannot specify both randcut() and randn()"
				exit 198
			}
			else if `randn'<1 {
				di as error "randn() must be a positive integer"
				exit 198
			}
			else if `randn'>`popsize' {
				di as text "randn() is larger than the population. using the full population."
				local randvar=""
			}
			else {
				local randcut=`randn'/`popsize'
				
				if "`randvar'"!="" {
					qui sum `randvar', meanonly
					if r(min)<0 | r(max)>1 {
						di as error "with randn(), the randvar specified must be in [0,1] and ought to be uniformly distributed"
						exit 198
					}
				}
			}
		}

		* Check if need to gen a temporary uniform random var
		if "`randvar'"=="" {
			if (`randcut'<1 & `randcut'>0) { 
				tempvar randvar
				gen `randvar'=runiform()
			}
			* randcut sanity check
			else if `randcut'!=1 {
				di as error "if randcut() is specified without randvar(), a uniform r.v. will be generated and randcut() must be in (0,1)"
				exit 198
			}
		}

		* Mark observations used to calculate quantile boundaries
		if ("`randvar'"!="") {
			tempvar randsample
			mark `randsample' `wt' if `touse' & `randvar'<=`randcut'
		}
		else {
			local randsample `touse'
		}

		* Error checks
		qui count if `randsample'
		local samplesize=r(N)
		if (`nquantiles' > r(N) + 1) {
			if ("`randvar'"=="") di as error "nquantiles() must be less than or equal to the number of observations [`r(N)'] plus one"
			else di as error "nquantiles() must be less than or equal to the number of sampled observations [`r(N)'] plus one"
			exit 198
		}
		else if (`nquantiles' < 2) {
			di as error "nquantiles() must be greater than or equal to 2"
			exit 198
		}

		* Compute quantile boundaries
		_pctile `exp' if `randsample' `wt', nq(`nquantiles') `altdef'

		* Store quantile boundaries in list
		forvalues i=1/`=`nquantiles'-1' {
			local cutvallist `cutvallist' r(r`i')
		}
	}
	else if "`cutpoints'"!="" { /***** CUTPOINTS *****/
	
		* Parameter checks
		if "`cutvalues'"!="" {
			di as error "cannot specify both cutpoints() and cutvalues()"
			exit 198
		}		
		if "`wt'"!="" | "`randvar'"!="" | "`ALTdef'"!="" | `randcut'!=1 | `nquantiles'!=2 | `randn'!=-1 {
			di as error "cutpoints() cannot be used with nquantiles(), altdef, randvar(), randcut(), randn() or weights"
			exit 198
		}

		tempname cutvals
		qui tab `cutpoints', matrow(`cutvals')
		
		if r(r)==0 {
			di as error "cutpoints() all missing"
			exit 2000
		}
		else {
			local nquantiles = r(r) + 1
			
			forvalues i=1/`r(r)' {
				local cutvallist `cutvallist' `cutvals'[`i',1]
			}
		}
	}
	else { /***** CUTVALUES *****/
		if "`wt'"!="" | "`randvar'"!="" | "`ALTdef'"!="" | `randcut'!=1 | `nquantiles'!=2 | `randn'!=-1 {
			di as error "cutvalues() cannot be used with nquantiles(), altdef, randvar(), randcut(), randn() or weights"
			exit 198
		}
		
		* parse numlist
		numlist "`cutvalues'"
		local cutvallist `"`r(numlist)'"'
		local nquantiles=wordcount(`"`r(numlist)'"')+1
	}

	* Pick data type for quantile variable
	if (`nquantiles'<=100) local qtype byte
	else if (`nquantiles'<=32,740) local qtype int
	else local qtype long

	* Create quantile variable
	local cutvalcommalist : subinstr local cutvallist " " ",", all
	qui gen `qtype' `varlist'=1+irecode(`exp',`cutvalcommalist') if `touse'
	label var `varlist' "`nquantiles' quantiles of `exp'"
	
	* Return values
	if ("`samplesize'"!="") return scalar n = `samplesize'
	else return scalar n = .
	
	return scalar N = `popsize'
	
	tokenize `"`cutvallist'"'
	forvalues i=`=`nquantiles'-1'(-1)1 {
		return scalar r`i' = ``i''
	}

end


version 12.1
set matastrict on

mata:

void characterize_unique_vals_sorted(string scalar var, real scalar first, real scalar last, real scalar maxuq) {
	// Inputs: a numeric variable, a starting & ending obs #, and a maximum number of unique values
	// Requires: the data to be sorted on the specified variable within the observation boundaries given
	//				(no check is made that this requirement is satisfied)
	// Returns: the number of unique values found
	//			the unique values found
	//			the observation boundaries of each unique value in the dataset
	
	
	// initialize returned results
	real scalar Nunique
	Nunique=0

	real matrix values
	values=J(maxuq,1,.)
	
	real matrix boundaries
	boundaries=J(maxuq,2,.)

	// initialize computations
	real scalar var_index
	var_index=st_varindex(var)
	
	real scalar curvalue
	real scalar prevvalue
	
	// perform computations
	real scalar obs
	for (obs=first; obs<=last; obs++) {
		curvalue=_st_data(obs,var_index)
		
		if (curvalue!=prevvalue) {
			Nunique++
			if (Nunique<=maxuq) {
				prevvalue=curvalue
				values[Nunique,1]=curvalue
				boundaries[Nunique,1]=obs
				if (Nunique>1) boundaries[Nunique-1,2]=obs-1
			}
			else {
				exit(error(134))
			}
			
		}
	}
	boundaries[Nunique,2]=last
	
	// return results
	stata("return clear")
	
	st_numscalar("r(r)",Nunique)
	st_matrix("r(values)",values[1..Nunique,.])
	st_matrix("r(boundaries)",boundaries[1..Nunique,.])

}

end

***** Helper (recursive) binscatter
cap program drop addplot_scatter
program define addplot_scatter, rclass sortpreserve

local stata_version=c(version)

	version 12.1
	syntax varlist(min=2 numeric) [if] [in] [aweight fweight], [by(varname) ///
		Nquantiles(integer 20) GENxq(name) discrete xq(varname numeric) MEDians ///
		CONTROLs(varlist numeric ts fv) absorb(varlist) noAddmean REGtype(string) ///
		LINEtype(string) rd(numlist ascending) reportreg ///
		COLors(string) MColors(string) LColors(string) Msymbols(string) msize(string) ///
		savegraph(string) savedata(string) replace ///
		nofastxtile randvar(varname numeric) randcut(real 1) randn(integer -1) ///
		/* OPTIONS */ nbins(integer 20) create_xq x_q(varname numeric) symbols(string) method(string) unique(string) ///
		/* scatterpoints creation */ binsreg ///
		/* standard errors */ CLUSTer(varname) vce(string) ///
		/* coefficient display */ COEFficient(string) xcoef(string) ycoef(string) sample Pvalue ci(string) stars(string) ///
		/* histogram options */ HISTogram(string) XMin(string) YMin(string) xhistbarheight(string) yhistbarheight(string) xhistbarwidth(string) yhistbarwidth(string) xhistbins(string) yhistbins(string) ///
		/* histogram esthetic options */ xcolor(string) xcfcolor(string) xfintensity(string) xlcolor(string) xlwidth(string) xlpattern(string) xlalign(string) xlstyle(string) xbstyle(string) xpstyle(string) ycolor(string) ycfcolor(string) yfintensity(string) ylcolor(string) ylwidth(string) ylpattern(string) ylalign(string) ylstyle(string) ybstyle(string) ypstyle(string) ///
		/* background scatter options*/   ///
		/* options legacy */ name(string) legacy(string) ///
		*]
	set more off
	* Parse varlist into y-vars and x-var
	local x_var=word("`varlist'",-1)
	local y_vars=regexr("`varlist'"," `x_var'$","")
	local ynum=wordcount("`y_vars'")
	
	* Create convenient weight local
	if ("`weight'"!="") local wt [`weight'`exp']
	
	* Mark sample (reflects the if/in conditions, and includes only nonmissing observations) - the sample must also exclude singletons if a reghdfe is run
		if `"`absorb'"'!="" {
			if ("`regtype'"=="")|("`regtype'"=="reghdfe") {
			cap qui reghdfe `y_vars', absorb(`absorb')
			tempvar singleton_dropped
			cap qui gen `singleton_dropped'=1 if e(sample)
			}			
		}
	marksample touse
	markout `touse' `by' `xq' `controls' `absorb' `addvce' `addresid' `singleton_dropped', strok
	qui count if `touse'
	local samplesize=r(N)
	local touse_first=_N-`samplesize'+1
	local touse_last=_N
	* Check number of unique byvals & create local storing byvals
	if "`by'"!="" {
		local byvarname `by'
	
		capture confirm numeric variable `by'
		if _rc {
			* by-variable is string => generate a numeric version
			tempvar by
			tempname bylabel
			egen `by'=group(`byvarname'), lname(`bylabel')
		}
		
		local bylabel `:value label `by'' /*catch value labels for numeric by-vars too*/ 
		
		tempname byvalmatrix
		qui tab `by' if `touse', nofreq matrow(`byvalmatrix')
		
		local bynum=r(r)
		forvalues i=1/`bynum' {
			local byvals `byvals' `=`byvalmatrix'[`i',1]'
		}
	}
	else local bynum=1

	******  Name  ******
		if ("`name'"!="") {
		local addname="name(`name')"
		}
	* Isolate name from name local, solving a conflict with addplot
	tempvar comma_pos
	gen `comma_pos'=strpos("`name'",",")
	replace `comma_pos'=`comma_pos'-1
	local name_addplot=substr("`name'",1,`comma_pos')
		
	******  Standard errors  ******
		
		if ("`cluster'"!="") {
		local addcluster="cluster(`cluster')"
		}
		if ("`vce'"!="") {
		local addvce="vce(`vce')"
		}

	******  Confidence interval  ******
		if ("`ci'"!="") {
		local addci="level(`ci')"
		}	
	
	****** Save constant if binsreg is run 
	if ("`reportreg'"=="") | (("`reportreg'"!="") & ("`binsreg'"=="")) local binsreg_verbosity "quietly"
	if ("`binsreg'"!="") `binsreg_verbosity' {
		if `"`absorb'"'!="" {
		local absorb "absorb(`absorb')"
			if ("`regtype'"=="")|("`regtype'"=="reghdfe") {
			local regtype "reghdfe"
			local addresid="resid"
			}
			if ("`regtype'"=="areg") {
			local regtype "areg"
			}
		}
		else {
			local regtype "reg"
		}
	
		if ("`binsreg_verbosity'"=="quietly") capture `regtype' `y_vars' `x_var' `controls' `wt' if `touse', `absorb' `addcluster' `addvce'
			else capture noisily `regtype' `y_vars' `x_var' `controls' `wt' if `touse', `absorb' `addcluster' `addvce'	
	mat binsreg_eb=e(b)
	mat binsreg_eV=e(V)
	mat mat_edf_r=e(df_r)
	local binsreg_edf_r=mat_edf_r[1,1]
	local binsreg_const=binsreg_eb[1,"_cons"] 	
	}
	*
	
	******  Create residuals  ******
	
	if (`"`controls'`absorb'"'!="") quietly {
	
		* Parse absorb to define the type of regression to be used
		if `"`absorb'"'!="" {
		local absorb "absorb(`absorb')"
			if ("`regtype'"=="")|("`regtype'"=="reghdfe") {
			local regtype "reghdfe"
			local addresid="resid"
			}
			if ("`regtype'"=="areg") {
			local regtype "areg"
			}
		}
		else {
			local regtype "reg"
		}
		
		* Generate residuals
		local firstloop=1
		foreach var of varlist `x_var' `y_vars' {
			tempvar residvar 
			`regtype' `var' `controls' `wt' if `touse', `absorb' `addcluster' `addvce' `addresid' `addci'
			predict `residvar' if e(sample), residuals
			if ("`addmean'"!="noaddmean") {
				summarize `var' `wt' if `touse', meanonly
				replace `residvar'=`residvar'+r(mean)
			}
			
			label variable `residvar' "`var'"
			if `firstloop'==1 {
				local x_r `residvar'
				local firstloop=0
			}
			else local y_vars_r `y_vars_r' `residvar'
		}
		
	}
	else { 	/*absorb and controls both empty, no need for regression*/
		local x_r `x_var'
		local y_vars_r `y_vars'
	}
	****** Regressions for fit lines ******
	if ("`reportreg'"=="") | (("`reportreg'"!="") & ("`binsreg'"!="")) local reg_verbosity "quietly"

	if inlist("`linetype'","lfit","qfit") `reg_verbosity' {

		* If doing a quadratic fit, generate a quadratic term in x
		if "`linetype'"=="qfit" {
			tempvar x_r2
			gen `x_r2'=`x_r'^2
		}
		
		* Create matrices to hold regression results
		tempname e_b_temp
		forvalues i=1/`ynum' {
			tempname y`i'_coefs
		}
		
		* LOOP over by-vars
		local counter_by=1
		if ("`by'"=="") local noby="noby"
		foreach byval in `byvals' `noby' {
		
			* LOOP over rd intervals
			tokenize  "`rd'"
			local counter_rd=1	
				
			while ("`1'"!="" | `counter_rd'==1) {
			
				* display text headers
				if "`reportreg'"!="" {
					di "{txt}{hline}"
					if ("`by'"!="") {
						if ("`bylabel'"=="") di "-> `byvarname' = `byval'"
						else {
							di "-> `byvarname' = `: label `bylabel' `byval''"
						}
					}
					if ("`rd'"!="") {
						if (`counter_rd'==1) di "RD: `x_var'<=`1'"
						else if ("`2'"!="") di "RD: `x_var'>`1' & `x_var'<=`2'"
						else di "RD: `x_var'>`1'"
					}
				}
				
				* set conditions on reg
				local conds `touse'
				
				if ("`by'"!="" ) local conds `conds' & `by'==`byval'
				
				if ("`rd'"!="") {
					if (`counter_rd'==1) local conds `conds' & `x_r'<=`1'
					else if ("`2'"!="") local conds `conds' & `x_r'>`1' & `x_r'<=`2'
					else local conds `conds' & `x_r'>`1'
				}
					
				* LOOP over y-vars
				local counter_depvar=1
				foreach depvar of varlist `y_vars_r' {
				
					* display text headers
					if (`ynum'>1) {
						if ("`controls'`absorb'"!="") local depvar_name : var label `depvar'
						else local depvar_name `depvar'
						di as text "{bf:y_var = `depvar_name'}"
					}
					
					* perform regression
					if ("`reg_verbosity'"=="quietly") capture reg `depvar' `x_r2' `x_r' `wt' if `conds', `addcluster' `addvce'
					else capture noisily reg `depvar' `x_r2' `x_r' `wt' if `conds', `addcluster' `addvce'
					* store results
					if (_rc==0) matrix e_b_temp=e(b)
					else if (_rc==2000) {
						if ("`reg_verbosity'"=="quietly") di as error "no observations for one of the fit lines. add 'reportreg' for more info."
						
						if ("`linetype'"=="lfit") matrix e_b_temp=.,.
						else /*("`linetype'"=="qfit")*/ matrix e_b_temp=.,.,.
					}
					else {
						error _rc
						exit _rc
					}
					
					* relabel matrix row			
					if ("`by'"!="") matrix roweq e_b_temp = "by`counter_by'"
					if ("`rd'"!="") matrix rownames e_b_temp = "rd`counter_rd'"
					else matrix rownames e_b_temp = "="
					
					* save to y_var matrix
					if (`counter_by'==1 & `counter_rd'==1) matrix `y`counter_depvar'_coefs'=e_b_temp
					else matrix `y`counter_depvar'_coefs'=`y`counter_depvar'_coefs' \ e_b_temp
					
					* increment depvar counter
					local ++counter_depvar
				}
			
				* increment rd counter
				if (`counter_rd'!=1) mac shift
				local ++counter_rd
				
			}
			
			* increment by counter
			local ++counter_by
			
		}
	
		* relabel matrix column names
		forvalues i=1/`ynum' {
			if ("`linetype'"=="lfit") matrix colnames `y`i'_coefs' = "`x_var'" "_cons"
			else if ("`linetype'"=="qfit") matrix colnames `y`i'_coefs' = "`x_var'^2" "`x_var'" "_cons"
		}
	
	}

	******* Define the bins *******
		
	* Specify and/or create the xq var, as necessary
	if "`xq'"=="" {
		if !(`touse_first'==1 & word("`:sortedby'",1)=="`x_r'") sort `touse' `x_r'
	
		if "`discrete'"=="" { /* xq() and discrete are not specified */
			* Check whether the number of unique values > nquantiles, or <= nquantiles
			capture mata: characterize_unique_vals_sorted("`x_r'",`touse_first',`touse_last',`nquantiles')		
			if (_rc==0) { /* number of unique values <= nquantiles, set to discrete */
				local discrete discrete
				if ("`genxq'"!="") di as text `"note: the x-variable has fewer unique values than the number of bins specified (`nquantiles').  It will therefore be treated as discrete, and genxq() will be ignored"'

				local xq `x_r'
				local nquantiles=r(r)
				if ("`by'"=="") {
					tempname xq_boundaries xq_values
					matrix `xq_boundaries'=r(boundaries)		
					matrix `xq_values'=r(values)
				}
			}
			else if (_rc==134) { /* number of unique values > nquantiles, perform binning */
				if ("`genxq'"!="") local xq `genxq'
				else tempvar xq
	
				if ("`fastxtile'"!="nofastxtile") fastxtile `xq' = `x_r' `wt' in `touse_first'/`touse_last', nq(`nquantiles') randvar(`randvar') randcut(`randcut') randn(`randn')
				else xtile `xq' = `x_r' `wt' in `touse_first'/`touse_last', nq(`nquantiles')

				if ("`by'"=="") {
					mata: characterize_unique_vals_sorted("`xq'",`touse_first',`touse_last',`nquantiles')
					if (r(r)!=`nquantiles') {
						di as text "warning: nquantiles(`nquantiles') was specified, but only `r(r)' were generated. see help file under nquantiles() for explanation."
						local nquantiles=r(r)
					}

					tempname xq_boundaries xq_values
					matrix `xq_boundaries'=r(boundaries)		
					matrix `xq_values'=r(values)
				}
			}
			else {
				error _rc
			}

		}
		
		else { /* discrete is specified, xq() & genxq() are not */
		
			if ("`controls'`absorb'"!="") di as text "warning: discrete is specified in combination with controls() or absorb(). note that binning takes places after residualization, so the residualized x-variable may contain many more unique values."
			
			capture mata: characterize_unique_vals_sorted("`x_r'",`touse_first',`touse_last',`=`samplesize'/2')
		
			if (_rc==0) {
				local xq `x_r'
				local nquantiles=r(r)
				if ("`by'"=="") {
					tempname xq_boundaries xq_values
					matrix `xq_boundaries'=r(boundaries)		
					matrix `xq_values'=r(values)
				}
			}
			else if (_rc==134) {
				di as error "discrete specified, but number of unique values is > (sample size/2)"
				exit 134
			}
			else {
				error _rc
			}
		}
	}
	else {

		if !(`touse_first'==1 & word("`:sortedby'",1)=="`xq'") sort `touse' `xq'

		* set nquantiles & boundaries
		mata: characterize_unique_vals_sorted("`xq'",`touse_first',`touse_last',`=`samplesize'/2')

		if (_rc==0) {
			local nquantiles=r(r)  
			if ("`by'"=="") {
				tempname xq_boundaries xq_values
				matrix `xq_boundaries'=r(boundaries)		
				matrix `xq_values'=r(values)
			}
		}
		else if (_rc==134) {
			di as error "discrete specified, but number of unique values is > (sample size/2)"
			exit 134
		}
		else {
			error _rc
		}
	}

	********** Compute scatter points **********

	if ("`by'"!="") {
		sort `touse' `by' `xq'
		tempname by_boundaries
		mata: characterize_unique_vals_sorted("`by'",`touse_first',`touse_last',`bynum')
		matrix `by_boundaries'=r(boundaries)
	}

	forvalues b=1/`bynum' {
		if ("`by'"!="") {
			mata: characterize_unique_vals_sorted("`xq'",`=`by_boundaries'[`b',1]',`=`by_boundaries'[`b',2]',`nquantiles')
			tempname xq_boundaries xq_values
			matrix `xq_boundaries'=r(boundaries)
			matrix `xq_values'=r(values)
		}
		/* otherwise xq_boundaries and xq_values are defined above in the binning code block */

		* Define x-means
		tempname xbin_means
		if ("`discrete'"=="discrete") {
			matrix `xbin_means'=`xq_values'
		}
		else {
			means_in_boundaries `x_r' `wt', bounds(`xq_boundaries') `medians'
			matrix `xbin_means'=r(means)
		}

		* LOOP over y-vars to define y-means
		local counter_depvar=0
		foreach depvar of varlist `y_vars_r' {
			local ++counter_depvar

			means_in_boundaries `depvar' `wt', bounds(`xq_boundaries') `medians'

			* store to matrix
			if (`b'==1) {
				tempname y`counter_depvar'_scatterpts
				matrix `y`counter_depvar'_scatterpts' = `xbin_means',r(means)
			}
			else {
				* make matrices conformable before right appending			
				local rowdiff=rowsof(`y`counter_depvar'_scatterpts')-rowsof(`xbin_means')
				if (`rowdiff'==0) matrix `y`counter_depvar'_scatterpts' = `y`counter_depvar'_scatterpts',`xbin_means',r(means)
				else if (`rowdiff'>0)  matrix `y`counter_depvar'_scatterpts' = `y`counter_depvar'_scatterpts', ( (`xbin_means',r(means)) \ J(`rowdiff',2,.) )
				else /*(`rowdiff'<0)*/ matrix `y`counter_depvar'_scatterpts' = ( `y`counter_depvar'_scatterpts' \ J(-`rowdiff',colsof(`y`counter_depvar'_scatterpts'),.) ) ,`xbin_means',r(means)
			}
		}
	}


	*********** With Binsreg ***********
	if "`binsreg'"=="binsreg" {	/* This saves the matrix of scatterpoints from binsreg and replaces the one from binscatterhist */	
	if "`nquantiles'"!="" local addnbins "nbins(`nquantiles')"
	tempfile binsreg_points
	qui binsreg `y_vars' `x_var' `controls' `wt' if `touse', `addnbins' savedata(`binsreg_points')
	preserve
	use `binsreg_points', clear
	mkmat dots_x dots_fit, matrix(`y1_scatterpts')
	qui ereplace dots_binid=max(dots_binid)
	local binsreg_nbins=dots_binid[1]
	restore
		if "`binsreg_nbins'"!="`nquantiles'" {
		di as error "Binsreg is selecting `binsreg_nbins' bins, update your nquantiles option or replace with `binsreg_nbins'"
		exit
		}
	}	
	*

	*********** Perform Graphing ***********

	* If rd is specified, prepare xline parameters
	if "`rd'"!="" {
		foreach xval in "`rd'" {
			local xlines `xlines' xline(`xval', lpattern(dash) lcolor(gs8))
		}
	}

	* Fill colors if missing
	if `"`colors'"'=="" local colors ///
		navy maroon forest_green dkorange teal cranberry lavender ///
		khaki sienna emidblue emerald brown erose gold bluishgray ///
		/* lime magenta cyan pink blue */
	if `"`mcolors'"'=="" {
		if (`ynum'==1 & `bynum'==1 & "`linetype'"!="connect") local mcolors `: word 1 of `colors''
		else local mcolors `colors'
	}
	if `"`lcolors'"'=="" {
		if (`ynum'==1 & `bynum'==1 & "`linetype'"!="connect") local lcolors `: word 2 of `colors''
		else local lcolors `colors'
	}
	local num_mcolor=wordcount(`"`mcolors'"')
	local num_lcolor=wordcount(`"`lcolors'"')


	* Prepare connect & msymbol options
	if ("`linetype'"=="connect") local connect "c(l)"
	if "`msymbols'"!="" {
		local symbol_prefix "msymbol("
		local symbol_suffix ")"
	}
	
	*** Prepare scatters
	
	* c indexes which color is to be used
	local c=0
	
	local counter_series=0
	
	* LOOP over by-vars
	local counter_by=0
	if ("`by'"=="") local noby="noby"
	foreach byval in `byvals' `noby' {
		local ++counter_by
		
		local xind=`counter_by'*2-1
		local yind=`counter_by'*2

		* LOOP over y-vars
		local counter_depvar=0
		foreach depvar of varlist `y_vars' {
			local ++counter_depvar
			local ++c
			
			* LOOP over rows (each row contains a coordinate pair)
			local row=1
			local xval=`y`counter_depvar'_scatterpts'[`row',`xind']
			local yval=`y`counter_depvar'_scatterpts'[`row',`yind']
			
			if !missing(`xval',`yval') {
				local ++counter_series
				local scatters `scatters' (scatteri
				if ("`savedata'"!="") {
					if ("`by'"=="") local savedata_scatters `savedata_scatters' (scatter `depvar' `x_var'
					else local savedata_scatters `savedata_scatters' (scatter `depvar'_by`counter_by' `x_var'_by`counter_by'
				}
			}
			else {
				* skip the rest of this loop iteration
				continue
			}
			
			while (`xval'!=. & `yval'!=.) {
				local scatters `scatters' `yval' `xval'
			
				local ++row
				local xval=`y`counter_depvar'_scatterpts'[`row',`xind']
				local yval=`y`counter_depvar'_scatterpts'[`row',`yind']
			}
			
			* Add options
			if ("`msize'"!="") local addmsize "msize(`msize')" /* to add msize to the scatter options in the scatters local */
			local scatter_options `connect' `addmsize' mcolor(`: word `c' of `mcolors'') lcolor(`: word `c' of `lcolors'') `symbol_prefix'`msymbols'`symbol_suffix'
			local scatters `scatters', `scatter_options')
			if ("`savedata'"!="") local savedata_scatters `savedata_scatters', `scatter_options')
		

		}
		
	}
	addplot: `scatters', norescaling

end
*********************