*! version 3.2.1 September 20, 2007 @ 08:35:00
*! runs error checking on a list of variables
program define ckvar , rclass
version 8
	/* 3.2.1 - changed prefix of output variable to `stub' if stub is non-blank */
	/* 3.2.0 - made ckvar use tmpfile option always (most error checkers are non-complex)
              and added note for deprecated -slow- option
            - fixed up output from validation routine
            - now drop the -total- variable when there are no errors */
	/* 3.1.0 - on advice from Stata folks, made -total- non-optional */
	/* 3.0.4 - made the total into a standard thing w/ standardized name */
	/* 3.0.3 - caught some bugs related to ``slow'' */
	/* 3.0.2 - added the slow option from dochar */
	/* 3.0.1 - changed name from checkvar to ckvar b/c there is already another checkvar */
	/* 3.0.0 - changed to run on validation rules rather than error rules */
	/*         direct error checking can be done ONLY by scoring (if wanted) or by using */
	/*         `error' as a temp variable when writing characteristic code */
	/* 2.2.1 - various bug fixes  */
	/* 2.2.0 - added ability to score a variable (as in grading a test) */
	/*       - scoring: missing values are assumed to have a score of 0 */
	/*       - for weights - scoring multiplies the marker by the weight, errors are still just 0,1,2 etc. */
	/* version 2.1.0 - split out the dochar program as separate */
	/* version 2.0.0 - uses characteristics to keep the error checking attached to variables */
	/* have checked the part which checks for missing values */
	/* have checked using another variable to check for missing values */
	/* have checked use of different missing value tags */
	/* have partially checked the use of labels */
	/*  - fix needed: need to figure smart way to drop existing value labels */

	local myname "ckvar"
	set more 1
	syntax [varlist] [, KEY(varlist) MARKDup(namelist) NOVars DROPLABELS STUB(str) SCore VALid keepgoing nopreserve progress slow loud brief]

	if "`slow'"!="" {
		display as result "The -slow- option is now obsolete and will be ignored."
		}
		
	unab allvars : * , min(0)
	local fulllist : list allvars === varlist

	/* choice between valid and score---should be valid alone; left for backward compatibility */
	if "`valid'"!="" {
		if "`score'"!="" {
			display as error "`myname': Please specify either Score or Valid, but not both!"
			exit 198
			}
		}
	else {
		if "`score'"=="" {
			local valid "valid"
			}
		}

	if "`valid'"=="" {
		local output "score"
		}
	else {
		local output "error"
		}
	
	if "`key'"!="" {
		if "`markdup'"=="" {
			_ckdupl `key'
			}
		else {
			confirm new var `markdup'
			_ckdupl `key', gen(`markdup')
			}
		return scalar dups = r(dups)
		}

	if "`loud'"!="" {
		local noisily "noisily"
		}
	
	if "`keepgoing'"!="" {
		local preserve "nopreserve"
		local keepgoing "_continue"
		}
	
	if "`novars'"=="" {
		if "`preserve'"=="" {
			preserve
			}
		capture n {
			if "`stub'"=="" {
				local varstub "`output'"
				local charstub "`output'"
				if "`valid'"!="" {
					local charstub "valid"
					}
				local total `output'__total
				}
			else {
				local varstub "`stub'"
				local charstub "`stub'"
				local total `varstub'__total
				}

			/* double underscore to reduce naming conflict probability */
			capture confirm new var `total'
			if _rc {
				if _rc==110 {
					disp as error "`myname': The variable needed to hold totals: " as result "`total'" as error " already exists!"
					exit 110
					}
				error _rc
				}
			tempvar allmark
			if "`output'"=="error" {
				local byte "byte"
				}
			gen `byte' `allmark' = 0
			if "`output'"=="error" {
				local s "s"							 /* s for stupid */
				}
			label var `allmark' "Total `output'`s' across the observations"
			
			local cnt 1
			/* this makes the tempvars -error- and -valid- if error checking and -score- if scoring */
			tempvar `output' `valid'

			local maxScore 0
			local vcnt 1
			local anychecked 0
			foreach self of local varlist {
				if "`progress'"!="" {
					display as text "Checking `self'..."
					}
				if "`valid'"!="" {
					local tempnames "valid:`valid' error:`error'"
					}
				else {
					local tempnames "score:`score'"
					}
				
				local everythingOK 1
				local misval
				local checked 0
				local failreason

				/* dig through other like variables to find what vars are needed and what other vars are used */
				/*  this should help because it'll pick up required variables anywhere on the like chain */
/* 				dolikedig `self', evalchar(`charstub'_rule) datachar(`charstub'_other_vars_needed) accum */
/* 				local otherVarsNeeded "`r(contents)' `r(visits)'" */
/* 				local otherVarsNeeded: list uniq otherVarsNeeded */
				/* 				local otherVarsNeeded: list otherVarsNeeded - self */

				capture _ckneeded `self', stubs(`charstub') nolikeerror
				if _rc {
					if _rc == 111 {
						display as error "The variable(s) needed to check " as input "`self'" as error " for `output's are: " as input "`r(extras)'"
						local numvar : word count `r(extras)'
						display as error "Please make sure that " plural(`numvar',"this variable is","these variables are") " available!"
						}
					if "`keepgoing'"=="" {
						exit _rc
						}
					display as error "Did not check variable " as result "`self'"
					local failreason "needed other vars"
					local everythingOK 0
					} /* end check for other vars */

				/* figure out the name of the score-holding variable */
				/* if scoring an exam, this would be used for question by question analysis - want for all variables checked */
				if `everythingOK' {
					/* find name to hold the scoring/errors for the variable in question */
					/* can have trouble if there are multiple extremely long names */
					/*   which would have the same abbreviations */
					local scoreVar : char `self'[`varstub'_varname]
					if "`scoreVar'"=="" {
						local scoreVar = "`varstub'_" + substr("`self'",1,31-length("`varstub'")-1)
						}
					capture confirm new var `scoreVar'
					if _rc {
						display as error "The variable for holding `output's from `self': " as result "`scoreVar'"
						display as error "  already exists!"
						if "`keepgoing'"=="" {
							exit _rc
							}
						local everythingOK 0
						local failreason "`scoreVar' exists"
						}	/* end check of new scoring variable */
					}	/* end check if all ok */

				if `everythingOK' {
					/* run the validation/error check/scoring */
					/* using only the tmpfile option, because 99% of all rules do not use complex checking */
					local tempnames `"`tempnames' self:`self'"'
					capture n dochar `self'[`charstub'_rule], tempnames(`tempnames') tmpfile `loud' quiet
					if _rc {
						display as error "Could not evaluate rule `self'[`charstub'_rule]"
						local failreason "bad rule in `charstub'_rule"
						if "`keepgoing'"=="" {
							exit _rc
							}
						else {
							local everythingOK 0
							}
						}
					}

				if `everythingOK' {
					local checked = r(havechar) == "yes"
					local anychecked = `anychecked' | `checked'
					/* negate valid values to get errors */
					if "`valid'"!="" & `checked' {
						capture confirm var `error'
						if _rc {
							gen byte `error' = !`valid'
							drop `valid'
							}
						}
				
					/* use the checking from the deepest variable visited */
					local ckVar : word 1 of `r(likeVarlist)'
					if "`ckVar'"=="" {
						local ckVar "`self'"
						}

					/* when scoring, a missing value most likely would be a 0 */
					local req : char `ckVar'[`charstub'_required]

					if "`req'"!="" & (("`req'" == "1") | strpos("true",lower("`req'")) | strpos("yes",lower("`req'"))) {
						local misval: char `ckVar'[`charstub'_missing_value]
					
						if "`misval'"=="" {
							if "`output'"=="error" {
								local misval -1
								}
							else {
								local misval 0
								}
							}

						/* because missing values could have the same errors as others: */
						quietly count if missing(`self')
						local misscount = r(N)
						if `misscount' { 
							capture confirm new variable ``output''
							if _rc {
								quietly replace ``output'' = `misval' if missing(`self')
								}
							else {
								gen byte ``output'' = cond(missing(`self'),`misval',0)
								}
							local checked 1
							}	/* end of check of whether there were any missing values  */
						}		/* end of check for missing values important */
					else {
						capture confirm new variable ``output''
						if _rc {
							quietly replace ``output'' = 0 if missing(`self')
							}
						local misscount "N/A"
						} /* end check for separate missing */

					/* check to see if the scoreVar variable is even needed */
					/* --- will be kept if scoring, might be dropped when looking for errors */
			
					if `checked' {
						/* for error checking, drop temp var when all OK */
						if "`output'"=="error" {
							quietly count if ``output'' & (``output'' < .)
							local errcount = r(N)
							if `errcount'==0 {
								drop ``output''
								}
							}
						}	/* end of messages for checked variables */
					else {
						local misscount "N/A"
						local errcount "N/A"
						}

					/* now working with both scores and errors, but only if the variable will be kept */
					capture confirm var ``output''
					if !_rc {
										
						local theWt : char `ckVar'[`charstub'_wt]
						if "`theWt'"!="" {
							if `theWt'!=1 {
								if "`output'"=="score" {
									quietly replace ``output'' = ``output'' * `theWt'
									}
								else {
									local wtmod "weight of "
									}
								}
							}
						else {
							local theWt 1
							}

						local theLab : char `ckVar'[`charstub'_vlabel_name]
						if `"`theLab'"'=="" {
							local theLab "`scoreVar'"
							if "`droplabels'"!="" {
								capture label drop `theLab'
								}
							}
					
						/* capture in place in case of overwriting another label */
						local theLabVals : char `ckVar'[`charstub'_vlabel]
						if `"`theLabVals'"'!="" {
							capture label define `theLab' `theLabVals'
							if _rc {
								if _rc != 110 {
									display as error "`myname': There was a problem creating the value label " as result "`theLab'" as error "!"
									if "`keepgoing'"=="" {
										exit _rc
										}
									local everythingOK 0
									local failreason "value label failed"
									}
								}
							}	/* end check for labelling errors */

						if `everythingOK' {
					
							/* now split off errors and scores, again */
							if "`output'"=="error" {
								/* at least need items for 0, and 1 (and -1 if need be) */
								local curlabval : label `theLab' 0
								if `"`curlabval'"' == "0" {
									label define `theLab' 0 "No errors", modify
									}
								if "`misval'"!="" {
									local curlabval : label `theLab' `misval'
									if `"`curlabval'"'=="`misval'" {
										label define `theLab' `misval' "Missing", modify
										}
									}
								local curlabval : label `theLab' 1
								if `"`curlabval'"' == "1" {
									label defin `theLab' 1 "Some error(s)", modify
									}
								}
					
							if ("`output'"=="error") | (`"`theLabVals'"'!="") {
								label values ``output'' `theLab'
								}
							}

						capture confirm new var `scoreVar'
						if !_rc {
							rename ``output'' `scoreVar'
							local now "$S_DATE at $S_TIME"
							label var `scoreVar' "`output' for `self' generated on `now'"
							if "`total'"!="" {
								if "`output'"=="error" {
									quietly replace `allmark' = `allmark' + `theWt'*(`scoreVar' & !missing(`scoreVar'))
									}
								else {
									quietly replace `allmark' = `allmark' + `theWt'*`scoreVar' if !missing(`scoreVar')
									}
								}
							}

						if "`output'"=="score" {
							local maxScore = `maxScore' + `theWt'
							return scalar maxScore = `maxScore'
							}
						}	/* end check for need for the new variable */
					}	/* end of check that things are OK */

				/* this comes first to have blanket for lack of scoreVar */
				capture confirm var `scoreVar'
				if _rc {
					local scoreVar "none"
					}
				local failreason`vcnt' "`failreason'"
				/* this comes second to be sure that all info is blank in case of error */
				if "`failreason'"!="" {
					local failreasons "`failreasons' `self':`failreason';"
					local scoreVar
					local errcount
					local misscount
					}
				local errcount`vcnt' "`errcount'"
				local misscount`vcnt' "`misscount'"
				local scoreVar`vcnt' "`scoreVar'"
				local ++vcnt
				}	/* end of loop over varlist */
			if "`total'"!="" {
				rename `allmark' `total'
				local now =trim("$S_DATE at $S_TIME")
				label var `total' "The total `wtmod'`output'`s' found on `now'"
				}

			if "`output'"=="error" {
				display as text _new "Checking $S_FN on `c(current_date)' at `c(current_time)':" _newline
				if !`fulllist' {
					display as text "Checked a partial variable list: " _newline `"`varlist'"' _newline
					}

				/* display a table only if error totals are non-zero */
				quietly sum `total'
				return scalar totalerrors=`r(sum)'
				if `r(sum)' {
					local varwid 14
					local errwid 6
					local miswid 7
					local evarwid 17
					local failwid 23
					display as text "{ralign `varwid':Variable name}" ///
					  " {c |} {ralign `errwid':Errors}" ///
					  " {c |} {ralign `miswid':Missing}" ///
					  " {c |} {ralign `evarwid':Error-marker name}" `keepgoing'
					if "`keepgoing'"!="" {
						display " {c |} {ralign `failwid':Failure reason}"
						}
					display "{hline `varwid'}{c -}{c +}{c -}{hline `errwid'}{c -}{c +}{c -}{hline `miswid'}{c -}{c +}{c -}{hline `evarwid'}" `keepgoing'
					if "`keepgoing'"!="" {
						display "{c -}{c +}{c -}{hline `failwid'}"
						}
					local vcnt 1

					foreach self of local varlist {
						local showAs "result"
						if "`scoreVar`vcnt''"=="none" {
							local showAs "text"
							}
						if "`failreason`vcnt''"!="" {
							local showAs "error"
							}
						if `"`brief'"'=="" | (real("`errcount`vcnt''")>0 & real("`errcount`vcnt''")<.) | (real("`misscount`vcnt''")>0 & real("`misscount`vcnt''")<.) {
							display as `showAs' %`varwid's abbrev("`self'",`varwid') ///
							  as text " {c |} " as `showAs' "{ralign `errwid':`errcount`vcnt''}" ///
							  as text " {c |} " as `showAs' "{ralign `miswid':`misscount`vcnt''}" ///
							  as text " {c |} " as `showAs' %`evarwid's abbrev("`scoreVar`vcnt''",`evarwid') `keepgoing'
							if "`keepgoing'"!="" {
								display as text " {c |} " as error "{ralign `failwid':`failreason`vcnt''}"
								}
							}
						local ++vcnt
						}
					if "`brief'"!="" {
						display _new as text "All other variables had no errors or missing values of importance."
						}
					}	/* end of check for any errors */
				else {
					if `anychecked' {
						display as result "There were no errors or missing required values!"
						}
					else {
						display as result "No variables were checked---all rules seem to be missing!"
						}
					drop `total'
					}
				}	/* end of printing for error checking */

			return local failreasons "`failreasons'"
			if "`failreasons'"!="" {
				display
				ckfail `"`failreasons'"'
				}
			}	/* end main capture block */
		local rc = _rc
		if `rc' {
			if "`preserve'"=="" {
				if "`failreason'"!="" {
					display as result "ckvar failed because `failreason'"
					}
				if "`failreason'"=="dochar failed" {
					display as result "  try using the tmpfile option..."
					}
				display as text "Data restored..."
				}
			exit `rc'
			}
		else {
			if "`preserve'"=="" {
				restore, not
				}
			}
		}
end
			

program define _ckdupl, rclass
	syntax varlist [, gen(str)]

	tempvar dup
	quietly by `varlist', sort: gen byte `dup' = _n-1
	quietly count if `dup'
	local num=r(N)
	if `num' {
		disp as result  "`num' duplicate" plural(`num'," was","s were") " found based upon matching values of " as input "`varlist'" as result "."
		if "`gen'"!="" {
			quietly {
				by `varlist':  gen long `gen'=_n==1 & _N>1
				replace `gen'=sum(`gen') if `gen' | `dup'
				compress `gen'
				local now "$S_DATE at $S_TIME"
				label var `gen' "Duplicate group number generated on `now' based on key `varlist'"
				}
			}
		}
	else {
		disp as text "There were no duplicates based on matching values of `varlist'."
		}
	return scalar dups = `num'
end

program define ckfail, rclass
	syntax anything
	tokenize `anything', parse(":;")
	local cnt 1
	local maxlen = length("Variable name") + 1
	while `"``cnt''"'!="" {
		local var "``cnt''"
		if `cnt'==1 {
			local vars "`var'"
			}
		else {
			local vars "`vars' `var'"
			}
		local maxlen = max(`maxlen',length("`var'"))
		local cnt = `cnt' + 4
		}
	local col2 = `maxlen' + 1
	if `cnt' > 1 {
		display as result "Fatal Errors"
		display as text "Variable name" _column(`col2') " {c |} Errors"
		display as text "{hline `col2'}{c +}{c -}{hline 53}"
		local cnt 1
		while `"``cnt''"''!="" {
			local reasoncnt = `cnt' + 2
			local reason "``reasoncnt''"
			display as result "``cnt''" _column(`col2') as text " {c |} " as result "``reasoncnt''"
			local cnt = `cnt' + 4
			}
		}
	else {
		display as text "All OK"
		}
	return local failvars "`vars'"
end