*! version 1.3.1 Matthew White 21mar2014
program bcstats, rclass
	vers 9

	#d ;
	syntax, Surveydata(str) Bcdata(str) id(passthru)
		/* comparison variables */
		[t1vars(passthru) t2vars(passthru) t3vars(passthru)]
		/* enumerator checks */
		[ENUMerator(passthru) BACKchecker(passthru)
		ENUMTeam(passthru) BCTeam(passthru) SHowid(str) showall]
		/* stability checks */
		[ttest(passthru) Level(real -1) signrank(passthru)]
		/* comparisons dataset */
		[KEEPSUrvey(passthru) keepbc(passthru) full NOLabel
		FILEname(str) replace dta]
		/* string comparison */
		[LOwer UPper NOSymbol TRim]
		/* other options */
		[okrate(real 0.1) okrange(str) nodiff(str asis) exclude(str asis)]
	;
	#d cr

	***check syntax***

	* Parse -okrange()-.
	parse_okrange `okrange'
	loc rangevars		"`s(varlist)'"
	loc okrange_perc	`s(perc)'
	loc okrange_min		`s(min)'
	loc okrange_max		`s(max)'

	preserve

	* Unabbreviate and expand varlists.
	#d ;
	parse_opt_varlists,
		surveydata(`"`surveydata'"') bcdata(`"`bcdata'"') rangevars(`rangevars')
		`id' `t1vars' `t2vars' `t3vars' `ttest' `signrank'
		`enumerator' `enumteam' `keepsurvey'
		`backchecker' `bcteam' `keepbc'
		varname(enumerator enumteam backchecker bcteam)
		numeric(enumerator enumteam backchecker bcteam ttest signrank)
	;
	#d cr

	* Check the comparison variables.
	loc tvars `t1vars' `t2vars' `t3vars'
	if !`:list sizeof tvars' {
		* Using -icd9- as a template.
		di as err "must specify one of options t1vars(), t2vars(), or t3vars()"
		ex 198
	}

	* Finish processing -okrange()-.

	foreach var of loc rangevars {
		if !`:list var in tvars' {
			di as err "option okrange(): " ///
				"`var' not type 1, type 2, or type 3 variable"
			ex 198
		}
	}

	forv i = 1/`:list sizeof rangevars' {
		loc var :			word `i' of `rangevars'
		loc `var'perc :		word `i' of `okrange_perc'
		loc `var'min :		word `i' of `okrange_min'
		loc `var'max :		word `i' of `okrange_max'
	}

	* enumerator checks
	foreach option in enumerator backchecker enumteam bcteam showid {
		if "``option''" != "" & "`t1vars'`t2vars'" == "" {
			di as err "option `option' must be specified with option t1vars or t2vars"
			ex 198
		}
	}

	* Parse -showid()-.
	if !`:length loc showid' ///
		loc showid 30%
	parse_showid `showid'
	loc showid_val  `s(val)'
	loc showid_perc `s(perc)'

	* stability checks
	foreach option in ttest signrank {
		if "``option''" != "" & "`t2vars'`t3vars'" == "" {
			di as err "option `option' must be specified with option t2vars or t3vars"
			ex 198
		}
	}

	* Parse -filename()-.
	loc ext = cond("`dta'" == "", ".csv", ".dta")
	if !`:length loc filename' ///
		loc filename bc_diffs`ext'
	else {
		* Add a file extension to `filename' if necessary.
		mata: if (pathsuffix(st_local("filename")) == "") ///
			st_local("filename", st_local("filename") + st_local("ext"));;
	}

	* Check -filename()- and -replace-.
	cap conf new f `"`filename'"'
	if ("`replace'" == "" & _rc) | ("`replace'" != "" & !inlist(_rc, 0, 602)) {
		conf new f `"`filename'"'
		ex `=_rc'
	}

	* okrate, showall
	loc showall = "`showall'" != ""
	if !`showall' {
		if !inrange(`okrate', 0, 1) {
			di as err "okrate must be between 0 and 1"
			ex 198
		}
	}
	* -2, NOT -1: see subprogram errorrate
	else loc okrate -2

	* nodiff
	loc ndwc : word count `nodiff'
	if `ndwc' {
		loc nocommas : subinstr loc nodiff "," "", all
		loc ncwc : word count `nocommas'
		if `ncwc' > 18 {
			di as err "option nodiff: too many values"
			ex 130
		}
		tempvar test
		loc first 1
		while `ndwc' {
			gettoken vals  nodiff : nodiff, parse(",")
			gettoken comma nodiff : nodiff, parse(",")

			loc nvals : word count `vals'
			if `nvals' != 2 {
				di as err "invalid option nodiff"
				ex 198
			}

			gettoken num str : vals, quotes
			cap gen `test' = `num'
			cap confirm numeric v `test'
			if _rc {
				di as err "option nodiff: invalid numeric value"
				ex 198
			}
			drop `test'
			loc nodiffnum `nodiffnum'`=cond("`nodiffnum'" == "", "", ", ")'`num'

			cap gen `test' = `str'
			cap confirm str v `test'
			if _rc {
				di as err "option nodiff: invalid string value"
				ex 198
			}
			drop `test'
			loc str `str'
			cap loc str = `"`"`str'"'"'
			if _rc {
				di as err "option nodiff: string value not enclosable by compound double quotes"
				ex 198
			}
			loc nodiffstr `"`nodiffstr'`=cond(`"`nodiffstr'"' == "", "", ", ")'`str'"'
			loc ndwc : word count `nodiff'
		}
	}

	* exclude
	loc exclwc : word count `exclude'
	if `exclwc' {
		if `exclwc' > 2 {
			di as err "option exclude: too many values"
			ex 130
		}

		gettoken exclnum exclstr : exclude
		tempvar test
		cap gen `test' = `exclnum'
		cap confirm numeric v `test'
		if _rc {
			di as err "option exclude: invalid numeric value"
			ex 198
		}
		drop `test'

		cap gen `test' = `exclstr'
		cap confirm str v `test'
		if _rc {
			di as err "option exclude: invalid string value"
			ex 198
		}
		drop `test'
		loc exclstr `exclstr'
		cap loc exclstr = `"`"`exclstr'"'"'
		if _rc {
			di as err "option exclude: string value not enclosable by compound double quotes"
			ex 198
		}
	}

	* ttest, level
	if `level' == -1 loc level = c(level)
	else if "`ttest'" == "" {
		di as err "option level must be specified with option ttest"
		ex 198
	}
	if !inrange(`level', 10, 99.99) {
		di as err "level() must be between 10 and 99.99 inclusive"
		ex 198
	}

	* -lower- and -upper-
	if "`lower'" != "" & "`upper'" != "" {
		di as err "options lower and upper are mutually exclusive"
		ex 198
	}

	* duplicate variable specification
	* across options
	#d ;
	loc alloptions "
		"id t1vars t2vars t3vars enumerator enumteam backchecker bcteam"
		"id enumerator enumteam backchecker bcteam keepsurvey"
		"id backchecker bcteam keepbc"
	";
	#d cr
	foreach options of loc alloptions {
		loc nopts : word count `options'
		forv i = 1/`=`nopts' - 1' {
			loc option1 : word `i' of `options'
			forv j = `=`i' + 1'/`nopts' {
				loc option2 : word `j' of `options'
				loc shared : list `option1' & `option2'
				if `:list sizeof shared' {
					gettoken first : shared
					di as err "variable `first' specified in " ///
						"options `option1'() and `option2'()"
					ex 198
				}
			}
		}
	}

	* reserved variable names
	loc allvars `id' `tvars' `enumerator' `backchecker' `enumteam' `bcteam' `keepsurvey' `keepbc'
	foreach reserved in type variable survey back_check differences total error_rate {
		if `:list reserved in allvars' {
			di as err "`reserved' is a reserved variable name"
			ex 198
		}
	}

	* ttest, signrank
	foreach option in ttest signrank {
		loc not23 : list `option' - t2vars
		loc not23 : list not23 - t3vars
		if "`not23'" != "" {
			di as err "option `option': `:word 1 of `not23'' not type 2 or type 3 variable"
			ex 198
		}
	}

	loc surveyname survey
	loc bcname back check
	* "advars" suffix for "administrator variables"
	loc surveyadvars `enumerator' `enumteam'
	loc bcadvars `backchecker' `bcteam'
	foreach data in survey bc {
		use `"``data'data'"'

		* number of observations
		if !_N {
			di as err "no observations in ``data'name' data"
			ex 2000
		}

		* isid
		cap isid `id'
		if _rc {
			loc nvars : word count `id'
			di as err "`=plural(`nvars', "variable")' `id' `=plural(`nvars', "does", "do")' not uniquely identify observations in ``data'name' data"
			ex 459
		}

		* bc_ prefix
		cap ds bc_*
		if !_rc {
			di as err "variable `:word 1 of `r(varlist)'' has illegal prefix bc_ in ``data'name' data"
			ex 198
		}

		* enclosable by compound quotes
		tempvar noenclose
		qui ds `id' `tvars', has(type string)
		foreach var in `r(varlist)' {
			egen `noenclose' = total(strpos(`var', "`") | strpos(`var', `"""' + "'"))
			if `noenclose'[1] {
				di as err "`var' contains `" `" or ""' "' in ``data'name' data"
				ex 198
			}
			drop `noenclose'
		}

		keep `id' `tvars' ``data'advars' `keep`data''

		* save formats
		foreach var of loc keep`data' {
			loc keep`data'f `keep`data'f' `:format `var''
		}

		* modify strings
		if "`lower'`upper'`nosymbol'`trim'" != "" {
			qui ds `tvars', has(type string)
			foreach var in `r(varlist)' {
				if "`lower'`upper'" != "" qui replace `var' = `lower'`upper'(`var')
				if "`nosymbol'" != "" {
					foreach symbol in . , ! ? ' / ; : ( ) ` ~ @ # $ % ^ & * - _ = + [ ] { } \ | < > {
						qui replace `var' = subinstr(`var', "`symbol'", " ", .)
					}
					qui replace `var' = subinstr(`var', `"""', " ", .)
				}
				if "`trim'" != "" qui replace `var' = trim(itrim(`var'))
			}
		}

		* rename variables in back check data
		if "`data'" == "bc" {
			* variable name length
			foreach var of loc tvars {
				cap confirm new v bc_`var'
				if _rc {
					di as err "variable name '`var'' exceeds 29 characters"
					ex 198
				}
			}

			* rename
			foreach var of loc tvars {
				ren `var' bc_`var'
			}
			foreach var of loc keepbc {
				cap confirm v bc_`var'
				if _rc ren `var' bc_`var'
				loc bckeepbc `bckeepbc' bc_`var'
			}
		}

		* save modified data set
		sort `id'
		tempfile `data'
		qui save ``data''
	}

	***end***

	***produce data set***
	* merge
	qui merge `id' using `survey'

	* ids in back check but not survey data
	qui count if _merge == 3
	if !r(N) {
		di as err "there are no shared IDs between survey and back check data"
		ex 2000
	}
	qui count if _merge == 1
	if r(N) {
		di "{txt}note: the following ids appear in the back check data but not the survey data and will be dropped."
		sort `id'
		l `id' if _merge == 1, noo
	}
	qui drop if _merge != 3

	* attach survey value labels to back check tvars if not labeled and vice versa
	foreach var of loc tvars {
		loc surveylab : val la `var'
		loc bclab : val la bc_`var'
		if "`bclab'" == "" & "`surveylab'" != "" la val bc_`var' `surveylab'
		else if "`surveylab'" == "" & "`bclab'" != "" la val `var' `bclab'
	}

	* create postfile
	foreach var of loc id {
		loc idpost `idpost' `:type `var'' `var'
	}
	tempname pf
	tempfile byobs
	postfile `pf' `idpost' type str32 variable str244 survey str244 back_check diff using `byobs'

	* post
	sort `enumerator' `id'
	tempvar decvar bcdecvar
	foreach var of loc tvars {
		* use value label instead of number for variables survey and back_check
		loc uselab = "`:val la `var''" != "" & "`nolabel'" == ""
		if `uselab' {
			qui dec `var', gen(`decvar')
			qui dec bc_`var', gen(`bcdecvar')
		}

		* determine type/format of `var'
		cap confirm numeric v `var'
		loc type = cond(_rc, "str", "num")
		loc format : format `var'

		* determine whether `var' is type 1, 2, or 3
		loc ttype : list var in t1vars
		if !`ttype' {
			loc ttype : list var in t2vars
			if `ttype' loc ttype 2
			else loc ttype 3
		}

		* loop through observations
		forv i = 1/`=_N' {
			* option exclude
			if "`exclnum'" == "" loc post 1
			else loc post = bc_`var'[`i'] != `excl`type''
			if `post' {
				* prepare id values for post
				loc idpost
				foreach idvar of loc id {
					loc idval = `idvar'[`i']
					cap confirm str v `idvar'
					if !_rc loc idval `"`"`idval'"'"'
					loc idpost `idpost' (`idval')
				}

				* prepare survey and back check values for post
				if `uselab' {
					if mi(`decvar'[`i']) loc val = string(`var'[`i'], "`format'")
					else loc val = `decvar'[`i']

					if mi(`bcdecvar'[`i']) loc bcval = string(bc_`var'[`i'], "`format'")
					else loc bcval = `bcdecvar'[`i']
				}
				else {
					if "`type'" == "num" {
						loc val = string(`var'[`i'], "`format'")
						loc bcval = string(bc_`var'[`i'], "`format'")
					}
					else {
						loc val = `var'[`i']
						loc bcval = bc_`var'[`i']
					}
				}

				* prepare diff for post
				loc diff = `var'[`i'] != bc_`var'[`i']
				if `diff' {
					* option okrange
					if `:list var in rangevars' {
						if ``var'perc' loc diff = bc_`var'[`i'] <= (1 + ``var'min' / 100) * `var'[`i'] | ///
							bc_`var'[`i'] >= (1 + ``var'max' / 100) * `var'[`i']
						else loc diff = bc_`var'[`i'] <= `var'[`i'] + ``var'min' | bc_`var'[`i'] >= `var'[`i'] + ``var'max'
					}
					else loc diff 1

					* option nodiff
					if `diff' & "`nodiffnum'" != "" loc diff = !inlist(bc_`var'[`i'], `nodiff`type'')
				}

				* post
				post `pf' `idpost' (`ttype') ("`var'") (`"`val'"') (`"`bcval'"') (`diff')
			}
		}
		if `uselab' drop `decvar' `bcdecvar'
	}

	* close post file
	postclose `pf'

	* add value labels/formats to id; add administrator and "keep" variables; add variable labels
	use `byobs', clear
	tempvar n
	gen `n' = _n
	sort `id'
	qui save `byobs', replace
	use `survey', clear
	sort `id'
	qui merge `id' using `byobs'
	qui drop if _merge != 3
	drop _merge
	sort `id'
	qui merge `id' using `bc'
	qui drop if _merge != 3
	drop _merge
	sort `n'
	drop `n'
	order `id' `enumerator' `enumteam' `backchecker' `bcteam' type variable survey back_check diff `keepsurvey' `bckeepbc'
	if "`nolabel'" != "" {
		if `:list sizeof keepsurvey' | `:list sizeof bckeepbc' {
			qui ds `keepsurvey' `bckeepbc', has(t numeric)
			if "`r(varlist)'" != "" ///
				la val `r(varlist)'
		}
	}
	la var type "Variable type"
	cap la l vartype
	if _rc loc label vartype
	else {
		tempname label
		cap la l `label'
		while !_rc {
			tempname label
			cap la l `label'
		}
	}
	la def `label' 1 "type 1" 2 "type 2" 3 "type 3"
	la val type `label'
	la var variable "Variable"
	la var survey "Value in survey data"
	la var back_check "Value in back check data"
	la var diff "Difference between survey and back check"

	qui save `byobs', replace

	drop `:list tvars - keepsurvey'
	foreach var in `:list tvars - keepbc' {
		drop bc_`var'
	}

	* option full
	if "`full'" == "" {
		qui keep if diff
		drop diff
	}

	* save as .csv/.dta
	loc csvwarn 0
	if "`dta'" == "" {
		qui outsheet using `"`filename'"', c `replace'
		qui insheet  using `"`filename'"', c non clear
		qui ds
		foreach var in `r(varlist)' {
			if mi(`var'[1]) {
				loc csvwarn 1
				continue, break
			}
		}
	}
	else {
		qui compress
		qui save `"`filename'"', `replace'
	}
	***end***

	***display stats***
	use `byobs', clear

	* enumerator checks
	forv type = 1/2 {
		if "`t`type'vars'" != "" {
			di _n "{txt}Completing {res:enumerator} checks for type {res:`type'} variables..."

			* enumerators with high error rates
			if "`enumerator'" != "" {
				if !`showall' loc message Displaying enumerators with error rates above {res:`=100 * `okrate''%}...
				else loc message Displaying enumerator error rates...
				errorrate, type(`type') by1(`enumerator') by1name(enumerator) message("`message'") okrate(`okrate') keep
				loc varbyenum = r(high)
				tempname enum`type'
				mat `enum`type'' = r(rates)
				loc retenum `"`retenum' "ret mat enum`type' = `enum`type''""'

				tempvar highenum
				qui gen `highenum' = error_rate > `okrate' & !mi(error_rate)
				drop differences total error_rate
			}
			else loc varbyenum 0

			* enumerator team error rates
			if "`enumteam'" != "" {
				errorrate, type(`type') by1(`enumteam') by1name(enum team) message("Displaying enumerator team error rates...")
				tempname enumteam`type'
				mat `enumteam`type'' = r(rates)
				loc retenumteam `"`retenumteam' "ret mat enumteam`type' = `enumteam`type''""'
			}

			* variable error rates
			if `type' == 1 & !`showall' loc message Displaying variables with error rates above {res:`=100 * `okrate''%}...
			else loc message Displaying variable error rates...
			errorrate, type(`type') by1(variable) message("`message'") okrate(`=cond(`type' == 1, `okrate', -1)') strictreturn
			if `type' == 1 {
				qui errorrate, type(1) by1(variable) strictreturn
				tempname var1
				mat `var1' = r(rates)
				loc retvar `""ret mat var1 = `var1'""'
			}
			else {
				tempname var2
				mat `var2' = r(rates)
				loc retvar `"`retvar' "ret mat var2 = `var2'""'
			}

			* variables with high error rates for enumerators with high error rates
			if `varbyenum' {
				if !`showall' loc message Displaying variables with high error rates for enumerators with high error rates...
				else loc message Displaying variable error rates by enumerator...
				errorrate if `highenum', type(`type') by1(`enumerator') by2(variable) message("`message'") okrate(`okrate')
			}

			* back checker error rates
			if "`backchecker'" != "" {
				errorrate, type(`type') by1(`backchecker') by1name("back checker") message("Displaying back checker error rates...")
				tempname backchecker`type'
				mat `backchecker`type'' = r(rates)
				loc retbackchecker `"`retbackchecker' "ret mat backchecker`type' = `backchecker`type''""'
			}

			* back checker team error rates
			if "`bcteam'" != "" {
				errorrate, type(`type') by1(`bcteam') by1name("bc team") message("Displaying back checker team error rates...")
				tempname bcteam`type'
				mat `bcteam`type'' = r(rates)
				loc retbcteam `"`retbcteam' "ret mat bcteam`type' = `bcteam`type''""'
			}

			* back checks with high error rates (option showid)
			if `showid_perc' {
				loc if error_rate >= `showid_val' / 100
				loc message Displaying back checks with error rates of at least {res:`showid'}...
			}
			else {
				loc if differences >= `showid_val'
				loc message Displaying back checks with at least {res:`showid_val'} `=plural(`showid_val', "difference")'...
			}
			errorrate if `if', type(`type') by1(`id') message("`message'") keep
			qui count if `if' & type == `type'
			loc retshowid `"`retshowid' "return scalar showid`type' = `=r(N) != 0'""'
			drop differences total error_rate
		}
	}

	* stability checks
	foreach type in 2 3 {
		loc ttestvars    : list t`type'vars & ttest
		loc signrankvars : list t`type'vars & signrank
		if (`type' == 2 & "`ttestvars'`signrankvars'" != "") | (`type' == 3 & "`t`type'vars'" != "") {
			di _n "{txt}Completing {res:stability} checks for type {res:`type'} variables..."

			* type 3 variables: variable error rates
			if `type' == 3 {
				errorrate, type(`type') by1(variable) message("Displaying variable error rates...") strictreturn
				tempname var3
				mat `var3' = r(rates)
				loc retvar `"`retvar' "ret mat var3 = `var3'""'
			}

			* ttest and signrank
			loc tteststats N_1 N_2 p_l p_u p se t sd_1 sd_2 mu_1 mu_2 df_t
			loc signrankstats N_neg N_pos N_tie sum_pos sum_neg z Var_a
			foreach test in ttest signrank {
				loc statsrow
				foreach stat of loc `test'stats {
					loc statsrow `statsrow'`=cond("`statsrow'" == "", "", ", ")'r(`stat')
				}

				tempname statsmat
				foreach var of loc `test'vars {
					qui count if variable == "`var'" & !mi(`var', bc_`var')
					if r(N) {
						di _n "{txt}{cmd:`test'} for {res:`var'}:"
						if "`test'" == "ttest" ttest `var' == bc_`var' if variable == "`var'", level(`level')
						else signrank `var' = bc_`var' if variable == "`var'"
						loc row `statsrow'
					}
					else {
						di _n "{txt}no observations for {res:`var'}; skipping {cmd:`test'}"
						loc row
						forv i = 1/`:word count `statsrow'' {
							loc row `row'`=cond("`row'" == "", "", ", ")'.
						}
					}
					cap confirm mat `statsmat'
					mat `statsmat' = `=cond(_rc, "", "`statsmat' \ ")'(`row')
				}

				if "``test'vars'" != "" {
					mat rown `statsmat' = ``test'vars'
					mat coln `statsmat' = ``test'stats'
					loc ret`test' `"`ret`test'' "ret mat `test'`type' = `statsmat'""'
				}
			}
		}
	}
	***end***

	if `csvwarn' di _n "{txt}note: the comparisons .csv contains commas and is misaligned."

	`:word 2 of `retshowid''
	`:word 1 of `retshowid''
	foreach ret in signrank ttest var bcteam enumteam backchecker enum {
		forv i = `:word count `ret`ret'''(-1)1 {
			`:word `i' of `ret`ret'''
		}
	}
end


/* -------------------------------------------------------------------------- */
					/* parsing programs		*/

pr parse_okrange, sclass
	while `:length loc 0' {
		gettoken varmin 0 : 0, p(",")
		gettoken comma1 0 : 0, p(",")
		gettoken max    0 : 0, p(",")
		gettoken comma2 0 : 0, p(",")

		if "`comma1'" != "," | !inlist("`comma2'", ",", "") {
			di as err "option okrange() invalid"
			ex 198
		}

		* Parse the varname.
		gettoken var min : varmin
		if `:list sizeof var' > 1 {
			di as err "option okrange(): `var': too many variables specified"
			ex 103
		}
		loc vars "`vars' `"`var'"'"

		* Parse the min and the max.

		* Remove the brackets.
		* Remove leading and trailing white space.
		loc min : list retok min
		loc max : list retok max
		mata: st_local("maxlast", substr(st_local("max"), -1, 1))
		if substr("`min'", 1, 1) != "[" | "`maxlast'" != "]" {
			di as err "option okrange() invalid"
			ex 198
		}
		loc min : subinstr loc min "[" ""
		mata: st_local("max", ///
			substr(st_local("max"), 1, strlen(st_local("max")) - 1))
		loc min : list retok min
		loc max : list retok max

		* This check should come after the brackets are removed: "[ -x, y ]" is
		* four tokens, but it is a permitted syntax.
		if `:list sizeof min' > 1 | `:list sizeof max' > 1 {
			di as err "option okrange() invalid"
			ex 198
		}

		* Parse percentages.
		foreach local in min max {
			mata: st_local("`local'perc", ///
				strofreal(substr(st_local("`local'"), -1, 1) == "%"))
			if ``local'perc' {
				mata: st_local("`local'", substr(st_local("`local'"), 1, ///
					strlen(st_local("`local'")) - 1))
			}
		}
		if `minperc' + `maxperc' == 1 {
			di as err "option okrange(): range endpoints must be " ///
				"both absolute or both relative"
			ex 198
		}
		loc allperc `allperc' `minperc'

		cap conf n `min'
		if _rc {
			di as err "option okrange(): invalid minimum"
			ex 198
		}

		cap conf n `max'
		if _rc {
			di as err "option okrange(): invalid maximum"
			ex 198
		}

		if `min' > `max' {
			di as err "option okrange(): range minimum greater than maximum"
			ex 198
		}

		if `min' > 0 | `max' < 0 {
			di as err "option okrange(): range does not include 0"
			ex 198
		}

		loc allmin `allmin' `min'
		loc allmax `allmax' `max'
	}

	sret loc varlist	"`vars'"
	sret loc perc		`allperc'
	sret loc min		`allmin'
	sret loc max		`allmax'
end

pr parse_showid, sclass
	if `:list sizeof 0' != 1 {
		di as err "option showid() invalid"
		ex 198
	}

	mata: st_local("perc", strofreal(substr(st_local("0"), -1, 1) == "%"))
	if !`perc' ///
		loc val : copy loc 0
	else {
		mata: st_local("val", ///
			substr(st_local("0"), 1, strlen(st_local("0")) - 1))
	}

	cap conf n `val'
	if _rc {
		di as err "option showid() invalid"
		ex 198
	}

	if `perc' {
		if !inrange(`val', 0, 100) {
			di as err "showid rate must be between 0% and 100%"
			ex 198
		}
	}
	else if `val' < 0 {
		di as err "showid value must be nonnegative"
		ex 198
	}

	sret loc val  `val'
	sret loc perc `perc'
end

pr error_unab_diff
	syntax anything, opt(name)

	gettoken anything rest : anything
	if `:length loc rest' ///
		err 198

	di as err "option `opt'(): `anything' expands or unabbreviates to " ///
		"different variable lists in survey and back check data"
	ex 198
end

pr parse_opt_varlists
	loc optsboth	id t1vars t2vars t3vars ttest signrank
	loc optssurvey	enumerator enumteam keepsurvey
	loc optsbc		backchecker bcteam keepbc
	loc opts `optsboth' `optssurvey' `optsbc'

	foreach opt of loc opts {
		loc optssyntax `optssyntax' `opt'(str)
	}
	syntax, surveydata(str) bcdata(str) ///
		[rangevars(str asis) `optssyntax'] ///
		[varname(namelist) numeric(namelist)]

	foreach data in survey bc {
		loc dataname = cond("`data'" == "survey", "survey", "back check") + ///
			" data"

		loc fn : copy loc `data'data
		qui d using `"`fn'"'
		if r(N) ///
			qui u in 1 using `"`fn'"', clear
		else ///
			qui u `"`fn'"', clear

		foreach opt of loc optsboth {
			loc max = cond(`:list opt in varname', "max(1)", "")
			cap noi unab `opt'`data' : ``opt'', min(0) `max' name(`opt'())
			if _rc {
				di as err "in `dataname'"
				ex `=_rc'
			}

			* Sorting because even if ``opt'survey' and ``opt'bc' contain the
			* same variables, they may be in different orders after -unab-.
			foreach var in `:list sort `opt'`data'' {
				cap conf numeric var `var'
				loc `opt'`data'isnum ``opt'`data'isnum' `=!_rc'
			}
		}

		foreach var of loc rangevars {
			* Do not specify -name()-: we are parsing a single varlist, not the
			* entire option. Specifying -name()- would result in error messages
			* that are difficult to interpret.
			cap noi unab unab : `var', max(1)
			if _rc {
				di as err "in `dataname'"
				di as err "option okrange() invalid"
				ex `=_rc'
			}
			loc rangevars`data' `rangevars`data'' `unab'

			cap conf numeric var `var'
			if _rc {
				di as err "okrange():  `var':  string variable not allowed"
				ex 109
			}
		}

		foreach opt of loc opts`data' {
			loc max = cond(`:list opt in varname', "max(1)", "")
			unab `opt' : ``opt'', min(0) `max' name(`opt'())

			if `:list opt in numeric' {
				loc 0 , `opt'(``opt'')
				syntax, [`opt'(varlist num)]
			}
		}
	}

	* Check for differences across the datasets.

	foreach opt of loc optsboth {
		if !`:list `opt'survey === `opt'bc' {
			error_unab_diff "``opt''", opt(`opt')
			/*NOTREACHED*/
		}
		loc `opt' ``opt'survey'

		loc sort : list sort `opt'
		forv i = 1/`:list sizeof sort' {
			loc var :			word `i' of `sort'
			loc isnumsurvey :	word `i' of ``opt'surveyisnum'
			loc isnumbc :		word `i' of ``opt'bcisnum'

			if `isnumsurvey' != `isnumbc' {
				di as err "option `opt'(): " ///
					"`var' is numeric in one dataset and string in the other"
				ex 109
			}
		}
	}

	forv i = 1/`:list sizeof rangevars' {
		loc var	:			word `i' of `rangevars'
		loc varsurvey :		word `i' of `rangevarssurvey'
		loc varbc :			word `i' of `rangevarsbc'

		if "`varsurvey'" != "`varbc'" {
			error_unab_diff `var', opt(okrange)
			/*NOTREACHED*/
		}
	}
	loc rangevars `rangevarssurvey'

	* Check numeric varlists.
	* Placing this check here means that the error message does not have to
	* include the dataset name: we have already confirmed that the variable is
	* the same type in both datasets.
	foreach opt of loc optsboth {
		if `:list opt in numeric' {
			loc 0 , `opt'(``opt'')
			syntax, [`opt'(varlist num)]
		}
	}

	* Check for duplicates.

	foreach opt of loc opts {
		loc dups : list dups `opt'
		gettoken first : dups
		if "`first'" != "" {
			di as err "option `opt'(): " ///
				"variable `first' mentioned more than once"
			ex 198
		}
	}

	loc dups : list dups rangevars
	gettoken first : dups
	if "`first'" != "" {
		di as err "option okrange(): multiple ranges specified for `first'"
		ex 198
	}

	* Return parsed options.
	foreach opt in `opts' rangevars {
		c_local `opt' "``opt''"
	}
end

					/* parsing programs		*/
/* -------------------------------------------------------------------------- */


/* -------------------------------------------------------------------------- */
					/* -errorrate-			*/

* show table of error rates and save error rates matrix in r(rates)
pr errorrate, rclass
	qui gen differences = .
	qui gen total = .
	qui gen error_rate = .
	syntax [if/], type(integer) by1(varname) [by1name(str) by2(varname) message(str) okrate(real -1) keep strictreturn]
	drop differences total error_rate

	qui bys `by1' `by2': egen differences = total(diff & type == `type')
	qui by `by1' `by2': egen total = total(type == `type')
	qui by `by1' `by2': gen error_rate = differences / total

	if `okrate' == -1 {
		return scalar high = .

		qui count `=cond(`:length loc if', "if", "")' `if'
		loc table = r(N) != 0
	}
	else {
		tempvar higherr
		qui gen `higherr' = error_rate > `okrate' & !mi(error_rate)
		qui count if `higherr' `=cond(`:length loc if', "&", "")' `if'
		return scalar high = r(N) != 0

		loc table = return(high)
	}

	if `table' {
		if "`message'" != "" di _n "{txt}`message'"
		format error_rate %9.4f
		gsort -error_rate -total `by1' `by2'

		tempvar display tag
		if "`by2'" == "" {
			if `okrate' == -1 gen `display' = type == `type'
			else gen `display' = `higherr'
			if `:length loc if' qui replace `display' = `display' & `if'
			egen `tag' = tag(`by1') if `display'
			l `by1' error_rate differences total if `display' & `tag', ab(32) noo
		}
		else {
			cap confirm str v `by1'
			loc isstr = !_rc
			if `okrate' == -1 gen `display' = type == `type'
			else gen `display' = `higherr'
			if `:length loc if' qui replace `display' = `display' & `if'
			egen `tag' = tag(`by1' `by2') if `display'
			qui levelsof `by1' if `display', miss
			foreach level in `r(levels)' {
				if `isstr' loc level `"`"`level'"'"'
				l `by1' `by2' error_rate differences total if `display' & `tag' & `by1' == `level', ab(32) noo
			}
		}
	}

	if "`by2'" == "" {
		tempname ratesmat
		cap confirm str v `by1'
		loc isstr = !_rc
		qui levelsof `by1' `=cond("`strictreturn'" == "", "", "if type == `type'")', loc(levels) miss
		foreach level of loc levels {
			if `isstr' loc level `"`"`level'"'"'
			qui su error_rate if `by1' == `level'
			if r(N) loc rate = r(max)
			else loc rate .
			if `isstr' loc row `rate'
			else loc row `level', `rate'
			cap confirm mat `ratesmat'
			mat `ratesmat' = `=cond(_rc, "", "`ratesmat' \ ")'(`row')
		}

		if `isstr' {
			mat coln `ratesmat' = "error rate"
			mat rown `ratesmat' = `levels'
		}
		else mat coln `ratesmat' = "`=cond("`by1name'" == "", "`by1'", "`by1name'")'" "error rate"

		ret mat rates = `ratesmat'
	}

	if "`keep'" == "" drop differences total error_rate
end

					/* -errorrate-			*/
/* -------------------------------------------------------------------------- */