/* 
	cmogram y x [ , options ]
	
	Draw histogram-style conditional mean or median graph.
	
	by Christopher Robert, Harvard Kennedy School, chris_robert@hksphd.harvard.edu
	
	v1.11, September 6, 2011
*/	

program define cmogram, rclass
	syntax varlist(min=2 max=2) [if] [, Histopts(string) CONtrols(string) CONTROLvars(string) CUTpoint(string) CUTRight CI(string) MEDian LEGend COUNT FRACtion NOTEn NOTEPFX(string) NOTESFX(string) NONotes Lineat(string) TITle(string) Graphopts(string) SAVing(string) GENerate(string) BY(string) BYValues(string) BYTitle(string) SCatter LFit LFITCi QFit QFITCi LOWess LFITOpts(string) CIOpts(string) RCAPOpts(string) LOWOpts(string) GRAPHOPTS1(string) GRAPHOPTS2(string) GRAPHOPTS3(string) GRAPHOPTS4(string) LFITOPTS1(string) LFITOPTS2(string) LFITOPTS3(string) LFITOPTS4(string) RCAPOPTS1(string) RCAPOPTS2(string) RCAPOPTS3(string) RCAPOPTS4(string) LOWOPTS1(string) LOWOPTS2(string) LOWOPTS3(string) LOWOPTS4(string) TITLE1(string) TITLE2(string) TITLE3(string) TITLE4(string)]
	version 9.2
	marksample marked, strok
	quietly {
		* initialize variables
		tokenize `varlist'
		local yvar "`1'"
		local xvar "`2'"
		local xvarLabel : variable label `xvar'
		if "`xvarLabel'"=="" {
			local xvarLabel="`xvar'"
		}
		local yvarLabel : variable label `yvar'
		if "`yvarLabel'"=="" {
			local yvarLabel="`yvar'"
		}
		if "`controls'"~="" {
			local controlvars="`controls'"
			local controldesc=", controlling for '`controls''"
		}
		else {
			local controldesc=""
		}
		if "`median'"~="" {
			local oper="median"
		}
		else if "`fraction'"~="" {
			local oper="fraction"
		}
		else if "`count'"~="" {
			local oper="count"
		}
		else if "`ci'"~="" {
			local oper="confidence intervals"
		}
		else {
			local oper="mean"
		}
		if "`by'"~="" {
			local bydesc=", by '`by''"
		}
		else {
			local bydesc=""
		}
		if "`legend'"~="" {
			local legendoff=""
		}
		else {
			local legendoff="legend(off)"
		}

		* show overall output header
		count if `marked'
		local totcount0=r(N)
		noisily disp ""
		noisily disp "Plotting `oper' of `yvar', conditional on `xvar'`bydesc'`controldesc'."
		noisily disp ""
		noisily disp "n = `totcount0'"
		noisily disp ""

		* run regression if controlling; set y axis label
		if "`controlvars'"~="" {
			reg `yvar' `controlvars' if `marked'
			tempvar _resid
			predict `_resid', resid
			local yvar="`_resid'"

			if "`median'"~="" {
				local ylabel="Median residual of `yvarLabel'"
			}
			else if "`count'"~="" {
				local ylabel="Frequency"
			}
			else if "`fraction'"~="" {
				local ylabel="Proportion"
			}
			else {
				local ylabel="Mean residual of `yvarLabel'"
			}
		}
		else {
			if "`median'"~="" {
				local ylabel="Median of `yvarLabel'"
			}
			else if "`count'"~="" {
				local ylabel="Frequency"
			}
			else if "`fraction'"~="" {
				local ylabel="Proportion"
			}
			else {
				local ylabel="Mean of `yvarLabel'"
			}
		}
		
		* loop through and set up bins, potentially by sub-group
		if "`by'"~="" {
			if "`byvalues'"~="" {
				local byvals="`byvalues'"
			}
			else {
				levelsof `by', local(byvals)
			}
		}
		else {
			local byvals="1"
		}
		local miny=999999
		local maxy=-999999
		local num0=0
		local num1=1
		foreach byval of local byvals {
			if "`by'"~="" {
				local byif=" & `by'==`byval'"
				cap: count if `marked' `byif'
				if _rc~=0 {
					local byif=`" & `by'=="`byval'""'
					local bystg="1"
					count if `marked' `byif'
				}
				local totcount`num0'=r(N)
				if `num0'>0 {
					noisily disp ""
				}
				noisily disp "`by'==`byval' (n = `totcount`num0'')"
				noisily disp ""
			}
			else {
				local byif=""
			}
			* define bins and specify their heights
			forvalues right=`num0'/`num1' {
				tempvar _bn`right'
				gen `_bn`right''=.
				if `right'==`num0' | "`cutpoint'"~="" {
					tempvar _y`right' _x`right' _yh`right' _yl`right'
					gen `_yh`right''=.
					gen `_yl`right''=.
					if "`cutpoint'"~="" {
						count if `marked' & `xvar'==`cutpoint' `byif'
						local atcut=r(N)
						if `right'==`num0' {
							twoway__histogram_gen `xvar' if `marked' & `xvar'<=`cutpoint' `byif', freq `histopts' gen(`_y`right'' `_x`right'') display
						}
						else {
							twoway__histogram_gen `xvar' if `marked' & `xvar'>=`cutpoint' `byif', freq start(`cutpoint') `histopts' gen(`_y`right'' `_x`right'') display
							* note that the left and right both include the cut-point; the bar heights will be adjusted below
						}
					}
					else {
						local atcut=0
						twoway__histogram_gen `xvar' if `marked' `byif', freq `histopts' gen(`_y`right'' `_x`right'') display
					}
					local nbins=r(bin)
					local binstart=r(start)
					local binwidth`right'=r(width)
					local binleft=`binstart'
					local lastmax=r(max)
					
					* check for and manage missing (empty) bins
					replace `_x`right''=. if `_y`right''==.
					count if `_x`right''<.
					if r(N) < `nbins' {
						* fill in missing bins
						sort `_x`right''
						local binmid=(`binstart'+`binwidth`right''/2)
						forvalues bn=1/`nbins' {
							count if `_x`right''>(`binmid'-`binwidth`right''/2) & `_x`right''<(`binmid'+`binwidth`right''/2)
							if r(N) < 1 {
								replace `_x`right''=`binmid' if _n==(`nbins'+`bn')
								replace `_y`right''=0 if _n==(`nbins'+`bn')
							}
							local binmid=(`binmid'+`binwidth`right'')
						}
					}
					
					sort `_x`right''
					replace `_bn`right''=_n
					forvalues bn=1/`nbins' {
						local binright=`binleft'+`binwidth`right''
						* round up for last bin, to be sure to catch boundary points
						if `bn'==`nbins' {
							local binright=`lastmax'
						}
						if "`ci'"~="" {
							local sumcmd="ci"
							local sumopt="level(`ci') `ciopts'"
						}
						else {
							local sumcmd="sum"
							local sumopt="d"
						}
						if "`cutright'"=="" {
							if (`right'==`num0' & `bn'==1) | (`right'==`num1' & `bn'==1 & `atcut'==0) {
								`sumcmd' `yvar' if `xvar'>=`binleft' & `xvar'<=`binright' & `marked' `byif', `sumopt'
								local rangedesc="[`binleft',`binright']"
							}
							else {
								`sumcmd' `yvar' if `xvar'>`binleft' & `xvar'<=`binright' & `marked' `byif', `sumopt'
								local rangedesc="(`binleft',`binright']"
							}
						}
						else {
							if (`right'==`num1' & `bn'==`nbins') | ("`cutright'"=="" & `right'==`num0' & `bn'==`nbins') {
								`sumcmd' `yvar' if `xvar'>=`binleft' & `xvar'<=`binright' & `marked' `byif', `sumopt'
								local rangedesc="[`binleft',`binright']"
							}
							else {
								`sumcmd' `yvar' if `xvar'>=`binleft' & `xvar'<`binright' & `marked' `byif', `sumopt'
								local rangedesc="[`binleft',`binright')"
							}
						}
						local rangen=r(N)
						if "`median'"~="" {
							local rangeydesc="median"
							local rangey=r(p50)
						}
						else if "`count'"~="" {
							local rangeydesc="count"
							local rangey=r(N)
						}
						else if "`fraction'"~="" {
							local rangeydesc="fraction"
							local rangey=r(N)/`totcount`num0''
						}
						else {
							local rangeydesc="mean"
							local rangey=r(mean)
						}
						if "`ci'"~="" {
							local cih=r(ub)
							local cil=r(lb)
							replace `_y`right''=`rangey' if `_bn`right''==`bn'
							replace `_yh`right''=`cih' if `_bn`right''==`bn'
							replace `_yl`right''=`cil' if `_bn`right''==`bn'
							if `cil'<`miny' {
								local miny=`cil'
							}
							if `cih'>`maxy' & `cih'<. {
								local maxy=`cih'
							}
							noisily disp "Bin #`bn': `rangedesc' (n = `rangen') (`rangeydesc' = `rangey'; CI = (`cil',`cih'))"
						}
						else {
							replace `_y`right''=`rangey' if `_bn`right''==`bn'
							if `rangey'<`miny' {
								local miny=`rangey'
							}
							if `rangey'>`maxy' & `rangey'<. {
								local maxy=`rangey'
							}
							noisily disp "Bin #`bn': `rangedesc' (n = `rangen') (`rangeydesc' = `rangey')"
						}
						
						local binleft=`binright'
					}
				}
			}
		
			local num0=`num0'+2
			local num1=`num1'+2
		}	
			
		* output graph(s)	and save results
		return clear
		local graphnames=""
		local num0=0
		local num1=1
		local graphno1=1
		local graphno2=2
		local titleno=1
		foreach byval of local byvals {
			if "`by'"~="" {
				if "`bystg'"=="1" {
					local byif=`" & `by'=="`byval'""'
				}
				else {
					local byif=" & `by'==`byval'"
				}
			}
			else {
				local byif=""
			}
			* assemble code to draw (and possibly save) graph
			local graphname="_graph`num0'"
			local nameopts="name(`graphname')"
			cap: graph drop `graphname'
			if "`lineat'"=="" {
				local lineopts ""
			}
			else {
				local lineopts "xline(`lineat', lpattern(dash))"
			}
			* define a y axis, making it common across sub-groups
			if "`by'"~="" | ("`count'"=="" & "`fraction'"=="") {
				local diff=(`maxy'-`miny')
				if `diff'<0.0005 {
					local roundingto=0.00001
					local decimals=5
				}
				else if `diff'<0.005 {
					local roundingto=0.0001
					local decimals=4
				}
				else if `diff'<0.05 {
					local roundingto=0.001
					local decimals=3
				}
				else if `diff'<0.5 {
					local roundingto=0.01
					local decimals=2
				}
				else if `diff'<5 {
					local roundingto=0.1
					local decimals=1
				}
				else {
					local roundingto=1
					local decimals=0
				}
				if "`count'"~="" | "`fraction'"~="" {
					local floor=0
				}
				else {
					local floor=round(`miny'-(`maxy'-`miny')/16,`roundingto')
					if `floor'<0 & `miny'>=0 {
						local floor=0
					}
				}
				local ceil=round(`maxy',`roundingto')
				local ystep=(`ceil'-`floor')/4
				local ceil=round(`maxy',`roundingto')
				local bottom=min(`miny',`floor')
				local top=max(`maxy',`ceil')
				if "`scatter'"~="" {
					local yrangeopts "ylabel(`floor'(`ystep')`ceil', format(%9.`decimals'f)) yscale(range(`bottom' `top'))"
				}
				else {
					local yrangeopts "base(`bottom') ylabel(`floor'(`ystep')`ceil', format(%9.`decimals'f)) yscale(range(`bottom' `top'))"
				}
			}
			else {
				local yrangeopts "yscale(range(0 .)) ylabel(#7)"
			}
			if "`by'"~="" & "`nonotes'"=="" {
				if "`noten'"~="" {
					local noteopt=`"note("`notepfx'`by'=`byval', n=`totcount`num0''`notesfx'")"' 
				}
				else {
					local noteopt=`"note(`notepfx'"`by'=`byval'`notesfx'")"' 
				}
			}
			else if "`noten'"~="" & "`nonotes'"=="" {
				local noteopt=`"note("`notepfx'n=`totcount`num0''`notesfx'")"'
			}
			else {
				local noteopt=""
			}
			
			if "`scatter'"~="" {
				local gtype="scatter"
				local barwid0=""
				local barwid1=""
			}
			else {
				local gtype="bar"
				local barwid0="barwidth(`binwidth`num0'')"
				local barwid1="barwidth(`binwidth`num1'')"
			}
			
			* potentially add line(s) of best fit
			if "`lfit'"~="" | "`lfitci'"~="" | "`qfit'"~="" | "`qfitci'"~="" {
				local lfplot=""
				if "`qfitci'"~="" {
					local lfcmd="qfitci"
					if strpos("`lfitopts'","cip") == 0 {
						local lfplot="ciplot(rline)"
					}
				}
				else if "`qfit'"~=""{
					local lfcmd="qfit"
				}
				else if "`lfitci'"~="" {
					local lfcmd="lfitci"
					if strpos("`lfitopts'","cip") == 0 {
						local lfplot="ciplot(rline)"
					}
				}
				else {
					local lfcmd="lfit"
				}
				if "`cutpoint'"~="" {
					if "`cutright'"~="" {
						local lfopt1=`"|| `lfcmd' `yvar' `xvar' if `marked' & `xvar'<`cutpoint' `byif', `legendoff' range(. `cutpoint') `lfplot' `lfitopts' `lfitopts`graphno1''"'
						local lfopt2=`"|| `lfcmd' `yvar' `xvar' if `marked' & `xvar'>=`cutpoint' `byif', `legendoff' range(`cutpoint' .) `lfplot' `lfitopts' `lfitopts`graphno2''"'
					}
					else {
						local lfopt1=`"|| `lfcmd' `yvar' `xvar' if `marked' & `xvar'<=`cutpoint' `byif', `legendoff' range(. `cutpoint') `lfplot' `lfitopts' `lfitopts`graphno1''"'
						local lfopt2=`"|| `lfcmd' `yvar' `xvar' if `marked' & `xvar'>`cutpoint' `byif', `legendoff' range(`cutpoint' .) `lfplot' `lfitopts' `lfitopts`graphno2''"'
					}
				}
				else {
					local lfopt1=`"|| `lfcmd' `yvar' `xvar' if `marked' `byif', `legendoff' `lfplot' `lfitopts' `lfitopts`graphno1''"'
					local lfopt2=""
				}
			}
			else {
				local lfopt1=""
				local lfopt2=""
			}
			
			* potentially add lowess plot
			if "`lowess'"~="" {
				if "`cutpoint'"~="" {
					if "`cutright'"~="" {
						local lowopt1=`"|| lowess `yvar' `xvar' if `marked' & `xvar'<`cutpoint' `byif', `legendoff' `lowopts' `lowopts`graphno1''"'
						local lowopt2=`"|| lowess `yvar' `xvar' if `marked' & `xvar'>=`cutpoint' `byif', `legendoff' `lowopts' `lowopts`graphno2''"'
					}
					else {
						local lowopt1=`"|| lowess `yvar' `xvar' if `marked' & `xvar'<=`cutpoint' `byif', `legendoff' `lowopts' `lowopts`graphno1''"'
						local lowopt2=`"|| lowess `yvar' `xvar' if `marked' & `xvar'>`cutpoint' `byif', `legendoff' `lowopts' `lowopts`graphno2''"'
					}
				}
				else {
					local lowopt1=`"|| lowess `yvar' `xvar' if `marked' `byif', `legendoff' `lowopts' `lowopts`graphno1''"'
					local lowopt2=""
				}
			}
			else {
				local lowopt1=""
				local lowopt2=""
			}
			
			* potentially add confidence intervals
			if "`ci'"~="" {
				if "`cutpoint'"~="" {
					local ciopt1="|| rcap `_yl`num0'' `_yh`num0'' `_x`num0'', `legendoff' `rcapopts' `rcapopts`graphno1''"
					local ciopt2="|| rcap `_yl`num1'' `_yh`num1'' `_x`num1'', `legendoff' `rcapopts' `rcapopts`graphno2''"
				}
				else {
					local ciopt1="|| rcap `_yl`num0'' `_yh`num0'' `_x`num0'', `legendoff' `rcapopts' `rcapopts`graphno1''"
					local ciopt2=""
				}
			}
			else {
				local ciopt1=""
				local ciopt2=""
			}
			
			* potentially adjust title
			if "`title`titleno''"~="" {
				local titleval="`title`titleno''"
			}
			else {
				local titleval="`title'"
			}
			
			* actually draw a graph
			if "`cutpoint'"~="" {
				twoway `gtype' `_y`num0'' `_x`num0'', `barwid0' `yrangeopts' `graphopts' `graphopts`graphno1'' || `gtype' `_y`num1'' `_x`num1'', `barwid1' title("`titleval'") ytitle("`ylabel'") graphregion(fcolor(white)) `legendoff' `noteopt' `nameopts' `lineopts' `yrangeopts' `graphopts' `graphopts`graphno2'' `lfopt1' `lfopt2' `ciopt1' `ciopt2' `lowopt1' `lowopt2'
				if "`generate'"~="" {
					cap: gen `generate'x`num0'=.
					cap: gen `generate'y`num0'=.
					cap: gen `generate'x`num1'=.
					cap: gen `generate'y`num1'=.
					replace `generate'x`num0'=`_x`num0''
					replace `generate'y`num0'=`_y`num0''
					replace `generate'x`num1'=`_x`num1''
					replace `generate'y`num1'=`_y`num1''
				}
				return scalar bw`num0'=`binwidth`num0''
				return scalar bw`num1'=`binwidth`num1''
				local graphno1=`graphno1'+2
				local graphno2=`graphno2'+2
			}
			else {
				twoway `gtype' `_y`num0'' `_x`num0'', `barwid0' title("`titleval'") ytitle("`ylabel'") graphregion(fcolor(white)) `noteopt' `nameopts' `lineopts' `yrangeopts' `graphopts' `graphopts`graphno1'' `lfopt1' `lfopt2' `ciopt1' `ciopt2' `lowopt1' `lowopt2'
				if "`generate'"~="" {
					cap: gen `generate'x`num0'=.
					cap: gen `generate'y`num0'=.
					replace `generate'x`num0'=`_x`num0''
					replace `generate'y`num0'=`_y`num0''
				}
				return scalar bw`num0'=`binwidth`num0''
				local graphno1=`graphno1'+1
				local graphno2=`graphno2'+1
			}
			
			* optionally save a graph
			local graphnames="`graphnames' `graphname'"
			if "`saving'"~="" & "`by'"=="" {
				graph export "`saving'", name(`graphname') replace
				graph drop `graphname'
			}
			
			local num0=`num0'+2
			local num1=`num1'+2
			local titleno=`titleno'+1
		}

		* if using by option, combine graphs together and possibly save
		if "`by'"~="" {
			cap: graph drop combined
			graph combine `graphnames', name(combined) title("`bytitle'")
			graph drop `graphnames'
			if "`saving'"~="" {
				graph export "`saving'", name(combined) replace
				graph drop combined
			}
		}
	}
end