* Three-way to Excel (tw3xls) is a convenient tool to 
* export three-way frequency tables into Excel.
* It allows to pre-format and easily save data into a spreadsheet.
* Performance depends on disk read/write speed, number of tables (when in conjuction with -by-) and formatting options


*! tw3xls - Export three-way tables into Excel with formatting
*! Andrey Ampilogov, a.ampilogov@gmail.com
*! v1.2, 26-Aug-2017
* v1.1, 20-Apr-2017
* v1.0, 08-Nov-2016

cap program drop tw3xls

program define tw3xls, sortpreserve rclass
	version 14.2						//lowering the version will broke formatting of the putexcel commands, for example merge not working
	syntax varlist(min=3 max=3) [if] [in] using/, [ by(varname) sheet(string) STub(string) ///
		MIssing(real 12344) Format(string) sort(string) top(real 12344) Total(string) show replace modify]
	
	marksample touse
	token `varlist' 

	*~~~~~~~~~~~~~~~ preliminary ~~~~~~~~~~~~~~~; takes 0.05 sec
	* --> check if a `by' variable is present and save its type (string/numeric)
	local 4 `by'
	cap confirm variable `4'
	if _rc local four = -1
	else {
		cap confirm numeric variable `4'
		if !_rc local four = 0
		else    local four = 1
	}
	local varlist "`varlist' `4'"
	
	* --> create temporary varialbes
	foreach var of local varlist {
		tempvar levels_`var' 			//levels for each variable value (1,2,3,etc)
		tempname `var'_cnt   			//scalar for the number of levels of each variable
		tempfile labels					//temporary dataset whcich contains labels for rwos and corresponding values
	}
	tempvar freq group					//frequency values
	tempname _freq_scalar cols	rows	//frequency scalar = values of matrices, cols and rows = size of output matrix
	
	* --> save levels of each variable and its dimension (count of unique values)
	foreach var of local varlist {
		scalar ``var'_cnt' = 0
		gen `levels_`var'' = 0 
		
		qui levelsof `var', local(list`var')
		foreach x of local list`var' { 
			scalar ``var'_cnt' = ``var'_cnt' + 1
			cap confirm string variable `var'
			if _rc qui replace `levels_`var'' = ``var'_cnt' if `var' == `x'
			else   qui replace `levels_`var'' = ``var'_cnt' if `var' == "`x'"
		}
	}
	
	* --> save the size of tables/matrices and their elements = frequencies
	scalar `cols' = ``2'_cnt' * ``3'_cnt'
	scalar `rows' = ``1'_cnt'
	qui bysort `varlist': gen `freq' = _N 
	
	* --> label variables if there are no labels
	foreach var of local varlist {
		local l`var' : variable label `var' 			//save empty lables for a restore after program finishes
		local a: di `"`: var label `var''"' 
		if "`a'"=="" 	label variable `var' "`var'"	//label variables
	}
	
	
	*~~~~~~~~~~~~~~~ matrix generetaion ~~~~~~~~~~~~~~~; takes 0.5 sec
	* --> generate matrices with frequencies for each value of `4'th variable
	* -contract- is much faster than looping over three levelsof with -summ `freq' if ... - ; -r(N)-
	if `four' == -1 local list`4' = 1
	foreach y of local list`4' {
		//di "Category writen: `y'"
		
		local matcnt = `matcnt' + 1
		tempname U`matcnt'
	
		preserve
	* --> generate a frequencies dataset
		contract `varlist' `if' `in', zero freq(_freq)
		
		if `four' == -1 local happy 1
		else if `four' == 0 qui keep if `4' == `y'
		else if `four' == 1 qui keep if `4' == "`y'"
		
		** some obs left after the -keep- command
		if _N > 0 {
			qui egen `group' = group(`2' `3')
			qui egen  index  = group(`1')
			qui cap drop if `group'==. | index==.
			qui keep `group' _freq index
		
			qui reshape wide _freq, j(`group') i(index)
			* keep encoded id (index) - adjust it in mergingcells later
			order index, first
	* --> set zero freq to missing or another number; less than 0.01 sec of computing
			if `missing' == 12344  qui recode * (0=.)					// default option
			else if `missing' != 0 qui recode * (0=`missing')			// user-defined
	* --> save the matrix
			mkmat _all, mat(`U`matcnt'')
		}
		** in case no obs left 
		else {
			if `missing' == 12344 {
				matrix `U`matcnt'' = J(`=scalar(`rows')',`=scalar(`cols')'+1,.)
			}
			else if `missing' == 0 {
				matrix `U`matcnt'' = J(`=scalar(`rows')',`=scalar(`cols')'+1,0)
			}
			else {
				matrix `U`matcnt'' = J(`=scalar(`rows')',`=scalar(`cols')'+1,`missing')
			}
			//mat list  `U`matcnt''
		}
		mata: `U`matcnt'' = st_matrix("`U`matcnt''")				//copy Stata matrix to mata
		restore
	}
	
	
	*~~~~~~~~~~~~~~~ sort ~~~~~~~~~~~~~~~; 
	* sort tables by rowtot (in descending order)
	if "`sort'" != "" | `top'!=12344 { 
	* --> check entered options
		* for more than one options
		if `: word count `sort'' > 1 { 
			di as err "Please select only one sorting option: h(igh)est first or l(ow)est first"
			exit
		} 
		* for not listed options
		local sortlist "h hi hig high l lo low" 
		local sort1 = "`sort'"				//sort is a special word and usage of it gives an error in a list checking
		if !`: list sort1 in sortlist' { 
			di as err "Sorting option (`sort1') is unavailable. Please select a valid sorting: h(igh)est first or l(ow)est first"
			exit
		}
		local sort = substr("`sort'",1,1) 

	* --> save a dataset with lalbes to merge it in the end
		preserve
		qui egen index = group(`1')
		keep `1' index
		ren `1' rowlabel
		qui duplicates drop
		qui save "`labels'", replace
		restore
	}


	*~~~~~~~~~~~~~~~ TOTAL ~~~~~~~~~~~~~~~; 	
	if "`total'" != "" | "`sort'" != "" {	
	* --> check entered options
		* for more than one options
		if `: word count `total'' > 2 { 
			di as err "Please select one or two total options: r(ows) and/or c(ols)"
			exit
		} 
		* for not listed options
		local totallist = "r ro row rows c co col cols" 
		if !`: list total in totallist' { 
			di as err "Total option (`total') is unavailable. Please select a valid total option(s): r(ows) and/or c(ols)"
			exit
		}
		* check what totals to prepare
		local totallist1 = "c co col cols" 
		local totallist2 = "r ro row rows"
		if !`: list total in totallist1' { 
			local rowtot = "yes"
		}		
		if !`: list total in totallist2' { 
			local coltot = "yes"
		}

	* --> prepare matrices with rows/columns totals
		local matcnt = 0
		foreach y of local list`4' {
			local matcnt = `matcnt' + 1
			
			if "`coltot'" != "" & ("`sort'" != "" | "`rowtot'" != "" | `top'!=12344) {
				mata: s1 = sum(`U`matcnt'')			//workaround for zero matrices
				mata: st_local("s1", strofreal(s1))

				* get row sum
				mata: `U`matcnt'' = `U`matcnt'' , rowsum(`U`matcnt'')
				* adjust rowsum by index value by substracting the first column (or rowid)
				if `s1' > 0 {
				mata: for (i=1; i<=rows(`U`matcnt''); i++) `U`matcnt''[i,cols(`U`matcnt'')] = `U`matcnt''[i,cols(`U`matcnt'')] - i
				}
				* get column sum
				mata: `U`matcnt'' = `U`matcnt'' \ colsum(`U`matcnt'')				
			}
			else if "`sort'" != "" | "`rowtot'" != "" | `top'!=12344 {
				mata: s1 = sum(`U`matcnt'')
				mata: st_local("s1", strofreal(s1))
				
				* get row sum
				if `s1' > 0 {
				mata: `U`matcnt'' = `U`matcnt'' , rowsum(`U`matcnt'')
				}
				* adjust rowsum by index value by substracting the first column (or rowid)
				mata: for (i=1; i<=rows(`U`matcnt''); i++) `U`matcnt''[i,cols(`U`matcnt'')] = `U`matcnt''[i,cols(`U`matcnt'')] - i
			}
			else if "`coltot'" != "" {
				* get column sum
				mata: `U`matcnt'' = `U`matcnt'' \ colsum(`U`matcnt'')
			}
		}
	}

	
	*~~~~~~~~~~~~~~~ Servicing - label matrices ~~~~~~~~~~~~~~~; 	
	* --> prepare matrices with labels for row (rowl), columns (coll), supercolumns (scoll)  
	* duplicate each column labels by the number of supercolumns
	foreach collab of local list`2' {
		local mult = ""			
		forval i = 1/`=scalar(``3'_cnt')' {
			local mult = "`mult' `collab'"
		}
		local list`2'_2 "`list`2'_2' `mult'"
	}
	* generate string matrices with labels
	mata:	row_m  = tokens(st_local("list`1'"))
	mata:	col_m  = tokens(st_local("list`2'"))
	mata:	scol_m = tokens(st_local("list`3'"))
	mata:	rowl   = row_m'
	mata:	coll   = J(1, length(col_m), scol_m)
	mata:	scoll  = tokens(st_local("list`2'_2"))
	* Find the last column and its letter in Excel
	mata: 	st_matrix("`U`matcnt''", `U`matcnt'')			//copy mata matrix back to Stata
	local 	cols   = colsof(`U`matcnt'')
	local 	rows   = rowsof(`U`matcnt'')
	mata: 	st_local("rcolf", numtobase26(`cols'))
	
	

	
	*~~~~~~~~~~~~~~~ top(n) ~~~~~~~~~~~~~~~; 	
	* --> selects top N cases from the top (top depends on sorting: high or low)
	* adjust number of rows; more adjustments are inline with other code
	if `top'!=12344 {
		local coltotrow = 0
		if "`coltot'" != "" local coltotrow = 1
		local rows = `top' + `coltotrow'
	}
	

	*~~~~~~~~~~~~~~~ export to Excel ~~~~~~~~~~~~~~~; 
	* --> export both data and labels to Excel; that is the most intense part due to write opearions; takes ~90% computational time
	* Set default sheet name
	if "`sheet'" == "" local sheet = "Data"
	
	* Replace or modify options
	if "`replace'" == "" & "`modify'" == "" local replace = "replace"
	
	* Prepare an Excel file
	local ext = substr("`using'",-4,.)
	//writing into xls is faster than to xlsx but formatting (merged cells, borders) doesn't work as it should
	if !inlist("`ext'", "xlsx", ".xls")  	local using "`using'.xlsx"	
	qui putexcel clear
	qui putexcel set `"`using'"', sheet("`sheet'") `replace' `modify'

	
	* iterate for each table
	if "`sort'" != "" | `top'!=12344 preserve
	local matcnt = 0
	foreach y of local list`4' {
	* --> Prepare paste positions
		local matcnt = `matcnt' + 1
		mata: st_matrix("`U`matcnt''", `U`matcnt'')			//copy mata matrix back to Stata
		
		* set positions to paste the data in the spreadsheet
		local gap = 5 
		local startrow  = (`matcnt' - 1) * (`rows' + `gap') + 1
		local secondrow = `startrow' + 1	
		local thirdrow  = `startrow' + 2
		local fourthrow = `startrow' + 3
		local fifthrow  = `startrow' + 4
		local lastrow   = `secondrow' + `rows' + `gap' - 3	

		* save row positions into mata vectors
		mata: row1 = strtoreal(st_local("startrow"))
		mata: row2 = strtoreal(st_local("secondrow"))
		mata: row3 = strtoreal(st_local("thirdrow"))
		mata: row4 = strtoreal(st_local("fourthrow"))
		mata: row5 = strtoreal(st_local("fifthrow"))
		mata: rowN = strtoreal(st_local("lastrow"))

		* define table header(s)
		if `four' != -1   local title = "Table for `4' = `y'"
		else    		 		local title = ""

		
	* --> write data description (titles) and mata matrices with values
		if      "`sort'" == "" & `top'==12344 & "`rowtot'" == "" & "`coltot'" == "" {
			qui putexcel A`startrow'  = ("`title'") ///
						 A`secondrow' = ("`: var label `1''") ///
						 B`secondrow' = ("`: var label `2'' and `: var label `3''") ///
						 A`fifthrow'  = matrix(`U`matcnt'') 
		}
		else if "`sort'" == "" & `top'==12344 & "`rowtot'" != "" & "`coltot'" != "" {
			qui putexcel A`startrow'  = ("`title'") ///
						 A`secondrow' = ("`: var label `1''") ///
						 B`secondrow' = ("`: var label `2'' and `: var label `3''") ///
						 A`fifthrow'  = matrix(`U`matcnt'') ///
						 A`lastrow'   = ("Total") ///
						 `rcolf'`thirdrow' = ("Total")
						 						
		}
		else if "`sort'" == "" & `top'==12344 & "`rowtot'" != "" & "`coltot'" == "" {
			qui putexcel A`startrow'  = ("`title'") ///
						 A`secondrow' = ("`: var label `1''") ///
						 B`secondrow' = ("`: var label `2'' and `: var label `3''") ///
						 A`fifthrow'  = matrix(`U`matcnt'') ///
						 `rcolf'`thirdrow' = ("Total")
						 						
		}
		else if "`sort'" == "" & `top'==12344 & "`rowtot'" == "" & "`coltot'" != "" {
			qui putexcel A`startrow'  = ("`title'") ///
						 A`secondrow' = ("`: var label `1''") ///
						 B`secondrow' = ("`: var label `2'' and `: var label `3''") ///
						 A`fifthrow'  = matrix(`U`matcnt'') ///
						 A`lastrow'   = ("Total") 
		}
		qui else if "`sort'" != "" | `top'!=12344 {
		* use save/use algorithm instead of matrix operations for sorting
		* add table titles
			qui putexcel A`startrow'  = ("`title'") ///
						 A`secondrow' = ("`: var label `1''") ///
						 B`secondrow' = ("`: var label `2'' and `: var label `3''") ///
						 `rcolf'`thirdrow' = ("Total") 
			if "`coltot'" != "" qui putexcel A`lastrow' = ("Total")		
			if `top'!=12344 & "`sort'" == "" local sort = "h" 
			
		* add sorted data with corresponding row labels
			drop _all
			svmat `U`matcnt'', names(varn)
			ren varn1 index
			merge m:m index using "`labels'", keepusing(rowlabel) keep(master matched) nogen
			order rowlabel, first
			drop index
		* case for column totals: store the value, apply sorting, restore the totals value
			sum varn`cols' if rowlabel ==.
			if "`sort'" == "h" {
				replace varn`cols' = -1 if rowlabel ==. 
				gsort -varn`cols'
				replace varn`cols' = r(mean) if rowlabel ==. 
				replace rowlabel = -1 if rowlabel ==. 
				label define Total99 -1 "Total"
				label values rowlabel Total99
			}
			else if "`sort'" == "l" {
				replace varn`cols' = 100001 if rowlabel ==. 
				gsort varn`cols'
				replace varn`cols' = r(mean) if rowlabel ==. 
				replace rowlabel = 100001 if rowlabel ==. 
				label define Total99 100001 "Total"
				label values rowlabel Total99
			}
		* select top N cases
			if `top'!=12344  keep if _n<=`top' | rowlabel ==-1 | rowlabel == 100001
		* save sorted matrix with labels into Excel
			export excel using `"`using'"', sheet("`sheet'") cell(A`fifthrow') sheetmodify
			label drop Total99
			restore, preserve
		}
		
		* export label matrices to Excel
		mata:	b = xl()
		mata:	b.load_book("`using'")
		mata:	b.set_sheet("`sheet'")
		if "`sort'" == "" {
			mata:	b.put_string(row5, 1, rowl)			//overwrites index values in the first column
		}
		mata:	b.put_string(row4, 2, coll)
		mata:	b.put_string(row3, 2, scoll)
		mata:	b.close_book()

		
		
	*~~~~~~~~~~~~~~~ format ~~~~~~~~~~~~~~~
	* --> Excel formatting; takes ~90% of computational time
		if "`format'" != "" {
			* --> check entered options
			* for more than one options
			if `: word count `format'' > 2 { 
				di as err "Please select one or two format options: b(asic) and/or m(ergecells)"
				exit
			} 
			* for not listed options
			local formatlist = "b ba bas basic m me merge mergecells" 
			if !`: list format in formatlist' { 
				di as err "Format option (`format') is unavailable. Please select a valid format option(s): b(asic) and/or m(ergecells)"
				exit
			}
			* check what totals to prepare
			local formatlist1 = "b ba bas basic" 
			local formatlist2 = "m me merge mergecells"
			if !`: list format in formatlist1' { 
				local format_merge = "yes"
			}		
			if !`: list format in formatlist2' { 
				local format_basic = "yes"
			}

	*~~~~~~~~~~~~~~~ format(basic) ~~~~~~~~~~~~~~~
	* --> merge table titles, "total" cell, draw borders; takes ~20% of formatting step		
			if "`format_basic'" == "yes" {
			* merge table titles
				qui putexcel A`secondrow':A`fourthrow' , merge hcenter txtwrap
				qui putexcel B`secondrow':`rcolf'`secondrow', merge hcenter txtwrap
			* merge "total" cell
				if "`sort'" != "" | "`rowtot'" != "" {
				qui putexcel `rcolf'`thirdrow':`rcolf'`fourthrow', merge hcenter txtwrap
				}
			* draw borders
				qui putexcel B`fourthrow':`rcolf'`fourthrow', hcenter
				qui putexcel A`secondrow':`rcolf'`lastrow'  , border(all) vcenter 
			}
			
			
	*~~~~~~~~~~~~~~~ format(mergecells) ~~~~~~~~~~~~~~~
	* --> merge super column headers; takes ~80% of the step
			if "`format_merge'" == "yes" {	
				local colsn = `cols' - 1
				if "`sort'" != "" | "`rowtot'" != "" local colsn = `colsn' - 1
				
				forvalues i = 1(`=scalar(``3'_cnt')')`colsn' {
					local col1 = `i'+``3'_cnt'
					  mata:st_local("rcol1", numtobase26(`col1'))
					local col2 = `col1' - ``3'_cnt' + 1
					  mata:st_local("rcol2", numtobase26(`col2'))
					//di "`rcol2'`thirdrow' `rcol1'`thirdrow'"
					qui putexcel `rcol2'`thirdrow':`rcol1'`thirdrow' ,  merge hcenter
				}
			}
		//end of format option
		}
	//end of foreach loop
	}
	putexcel clear

	
	*~~~~~~~~~~~~~~~ show ~~~~~~~~~~~~~~~; ~0.2 sec
	* --> Show Stata's tabulation for three cases: a) one three-way table; 
	* b) a row of three-way tables where a fourth varialbe is numeric; 
	* c) a row of three-way tables where a fourth varialbe is string
	if "`show'" != "" {
		if `four' == -1 {
			di in yellow "Built-in view: table `1' `3' `2', c(freq)"
			cap nois table `1' `3' `2'
		}
		else if `four' == 0 {
			foreach y of local list`4' {
			di in yellow "Built-in view: table `1' `3' `2' if `4' == `y', c(freq) " 	//use label = (`: var label `4'')   
			cap nois table `1' `3' `2' if `4'==`y'
			}
		}
		else if `four' == 1 {
			foreach y of local list`4' {
			di in yellow "Built-in view: table `1' `3' `2' if `4' == " `"""' "`y'" `"""' ", c(freq)"
			cap nois table `1' `3' `2' if `4'=="`y'"
			}
		}
	}
	
	*~~~~~~~~~~~~~~~ stub ~~~~~~~~~~~~~~~
	* --> Save matrices into Stata with a user-defined name
	if "`stub'" != "" {
		forval i = 1/`matcnt'  {
			local matn = "`stub'`i'"
			mat `matn' = `U`i''
			local matlist2 "`matlist2' `matn'"
		}
		di ""
		di in yellow "Output tables stored in matrices:`matlist2'"
	}
	* --> as rclass
	forvalues i = 1/`matcnt' {
		return matrix tw`i' `U`i''
		local matlist2 "`matlist2' r(tw`i')"
	}
	
	*~~~~~~~~~~~~~~~ output ~~~~~~~~~~~~~~~
	* --> click to open the file
	di in yellow "Output written to {browse `using'}"
	
	*~~~~~~~~~~~~~~~ clear ~~~~~~~~~~~~~~~
	* --> clear mata
	mata: mata clear
	
end