*! 13mar2024
/*******************************************************************************
*                                                                              *
*                    Mata library for -crossvalidate- package                  *
*                                                                              *
*******************************************************************************/

**# Utilities
// Starts mata interpreter
mata:

// A function to retrieve an if/in expression from an estimation command.  The 
// value is returned in a Stata local macro named `ifin' and will be missing if 
// no if or in conditions were included in the command passed to the function.
string scalar getifin(string scalar x) {
	
	// Used to store the result from testing the match for the regular expression
	real scalar matched
	
	// Contains the if/in expression from the command that will be modified
	string scalar strexp
	
	// Tests if there is an if/in expression in the estimation command
	matched = ustrregexm(x, " i[fn]{1}\s+.*?(?=, *[a-zA-Z]|\$)")
	
	// If there is an expression in the estimation command
	if (matched) {
		
		// Stores the expression in strexp
		strexp = ustrregexs(0)
		
	// If there isn't a match	
	} else {
		
		// Stores an empty string
		strexp = ""
		
	} // End ELSE block from testing for presence of if/in expression
	
	// Stores the if/in expression in the local macro ifin
	st_local("ifin", strtrim(strexp))
	
	// Returns the matched string
	return(strexp)
	
} // End of Mata function definition to retrieve if/in expressions from command

// Defines a function to parse and return the command string up to the comma 
// that starts specification of options.  This is needed for cases where no if 
// or in statements are included in the estimation command to insert the 
// appropriate if statement to fit the model to the training data only.
string scalar getnoifin(string scalar x) {
	
	// Used to store the result from testing the match for the regular expression
	real scalar matched
	
	// Contains the if/in expression from the command that will be modified
	string scalar strexp
	
	// Tests the regular expression that will capture everything up to options
	matched = ustrregexm(x, "^(.*?)(?![^()]*\)),")
	
	// If there is a comma not enclosed in parentheses
	if (matched) {
		
		// Return the syntax up to the comma that starts the options
		strexp = ustrregexs(1)

	// If this doesn't result in a match
	} else {
		
		// We'll assume no options are specified and return the string 
		strexp = x
		
	} // End ELSE Block for cmd string w/o options
	
	// Returns the string upto
	st_local("noifin", strexp)
	
	// Returns the matched string
	return(strexp)
	
} // End of function definition to return the cmd string up to the options

// Defines a function to indicate whether or not the command string includes
// optional arguments
real scalar hasoptions(string scalar x) {
	
	return(ustrregexm(x, `"(?:(?!["\(]).)*?,\s*(?![^\(]*\))(.*)\$"'))
	
} // End of function definition to return an indicator for options in cmd

// Defines a function to parse the prefix command into it's constituent parts
void function cvparse(string scalar cv) {
	
	// Declares a transmorphic scalar for the tokenizer 
	transmorphic scalar t
	
	// Declares a string rowvector with valid option names
	string rowvector optnms
	
	// Declares a string matrix to store the parsed tokens
	string matrix opts
	
	// Declares the counter to identify token boundaries and the index for 
	// inserting of parsed tokens into the opts matrix, and an iterator used to 
	// loop over the rows of the matrix containing the parsed tokens
	real scalar cnt, optcnt, i
	
	// Declares a string scalar used to build the token and a string scalar that 
	// stores the token
	string scalar opt, token, fname
	
	// Initialize the tokenizer
	t = tokeninit("", ("(", " ", ")"), (""), 1)
	
	// Define the valid option names
	optnms = ("metric", "monitors", "uid", "tpoint", "retain", "kfold", 	 ///   
			 "state", "results", "grid", "params", "tuner", "seed", 		 ///   
			 "classes", "threshold", "pstub", "split", "display", "obs", 	 ///   
			 "modifin", "kfifin", "noall", "pmethod", "replay", "name", 	 ///   
			 "fitnm", "validnm", "popts")
	
	// Initialize a null matrix to store the valid results
	opts = J(26, 1, "")
	
	// Initialize the counter used to identify token boundaries based on 
	// parentheses
	cnt = 0
	
	// Initialize the row index scalar used to insert the parsed tokens into the 
	// matrix with the results
	optcnt = 1

	// Initialize the string scalar used to construct the parsed token from its 
	// constituent pieces
	opt = ""
	
	// Pass the string into the tokenizer
	tokenset(t, cv)
	
	// Loop over each of the tokens parsed by the tokenizer until the end
	while((token = tokenget(t)) != "") {
		
		// Check the next token to determine if it is an opening parenthesis
		// If so, increment the cnt variable to indicate that more tokens need 
		// to be combined in order to parse the option and its arguments and 
		// keep the value of cnt the same if it is not an opening parenthesis
		cnt = (tokenpeek(t) == "(" ? cnt + 1 : cnt)
		
		// Check the current token to determine if it is a closing parenthesis.
		// If it is decrement cnt and if not do not change cnt
		cnt = (token == ")" ? cnt - 1 : cnt) 
		
		// If the value of cnt is positive add this token to the opt variable
		if (cnt > 0) opt = opt + token
		
		// If the value is 0 
		else {
			
			// Insert the existing option string and it's closing parenthesis 
			// into the matrix with the options
			opts[optcnt, 1] = opt + token
			
			// Reset the opt variable so we can parse the next option
			opt = ""
			
			// If the nulled out string and the current token only contains a 
			// space increment the optcnt variable to insert the next opt into 
			// the next row of the matrix
			if (!ustrregexm(opt + token, "^\s\$")) optcnt = optcnt + 1
			
		} // End ELSE Block to insert parsed option into the matrix of results
		
	} // End of WHILE loop over the tokens parsed from cv	
	
	// Loop over the result matrix to verify valid options
	for(i = 1; i < optcnt; i++) {
		
		// Get the name of the option from the stored result
		getname(opts[i, 1])
		
		// Gets the full name of the function
		fname = fullnm(st_local("fnm"))
		
		// If the full name is contained in the rowvector above, return the 
		// parsed option to a local macro using the full name (even if it is an 
		// abbreviated option name)
		if (anyof(optnms, fname)) st_local(fname, opts[i, 1])
		
	} // End Loop to verify and return the valid options
	
} // End of function definition to parse prefix options for crossvalidate package

// Defines a function to retrieve the argument(s) passed to a parameter with an 
// option to specify the name of the returned macro
void function getarg(string scalar param, | string scalar rname) {
	
	// String scalar to store the argument(s)
	string scalar retval, retnm
	
	// Determines whether to use the default return name or a user supplied 
	// return name
	retnm = (rname == "" ? "argval" : rname)
	
	// Removes everything up to the opening parentheses with the regex, then 
	// removes the closing parenthesis with subinstr
	retval = ustrregexrf(param, "[a-zA-Z0-9_]+\(", "")
	
	// If the string ends with a closing parenthesis remove it to prevent 
	// unbalanced parentheses
	if (substr(retval, -1, 1) == ")") { 
		
		// Remove the trailing parenthesis
		retval = substr(retval, 1, strrpos(retval, ")") - 1)
		
	} // End IF Block to remove trailing parenthesis	
	
	// If the parameter doesn't include any parentheses return an empty string
	if (ustrregexm(param, "[\(\)]") == 0) st_local(retnm, "")
	
	// Returns the argument value in a local macro
	else st_local(retnm, retval)
	
} // End of function definition to get argument value from a parameter

// Defines a function to retrieve the metric name to support passing optional 
// arguments to metrics/monitors
void function getname(string scalar fname, | string scalar rname) {
	
	// Declares string scalar to store the return name
	string scalar retnm
	
	// Test if a return name was passed
	retnm = (rname == "" ? "fnm" : rname)

	// If a valid function name is used, this should return the function name
	if (ustrregexm(fname, "\s*([a-zA-Z0-9_]+).*")) st_local(retnm, ustrregexs(1))
	
	// Otherwise, return a blank string
	else st_local(retnm, "")
	
} // End of function definition to get function name for monitors/metrics

// Defines a struct object to store a richer representation of the data used for
// classification metrics
struct Crosstab {
	
	// The confusion matrix and reversed confusion matrix (for 2x2 cases only)
	real matrix conf, revconf
	
	// The row margins (sums of predicted categories) for the normal and 
	// reversed confusion matrix (for 2x2 cases only)
	real colvector rowm, revrowm
	
	// The diagonal from the confusion matrix (correctly classified cells)
	real colvector correct
	
	// The vector storing each of the values in the matrix in ascending order
	real colvector values
	
	// The column margins (sums of observed categories) for the normal and 
	// reversed confusion matrix (for 2x2 cases only)
	real rowvector colm, revcolm
	
	// The total number of observations
	real scalar n
	
	// The total number of true positive (correctly classified) cases
	real scalar tp
	
	// The number of categories in the confusion matrix
	real scalar levs
	
} // End of Struct definition returned by xtab

// Defines a function to compute cross tabulations and return the cross tab
// as a Mata matrix
struct Crosstab scalar xtab(string scalar pred, string scalar obs, 			 ///   
							string scalar touse) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Allocates column vectors to store the unique values; the predicted and 
	// observed values of the dependent variable; the indices for the relevant
	// rows in the dataset; and the row margins
	real colvector vals, yhat, y, idx, rowm
	
	// Allocates a row vector to store the column margins
	real rowvector colm
	
	// Defines the matrix that will store the confusion matrix
	real matrix conf
	
	// Defines scalars that store the number of unique levels of the variable of 
	// interest, and two iterators for the rows and columns of the matrix
	real scalar levs, i, j
	
	// Gets the predicted values of the variable of interest
	yhat = st_data(., pred, touse)
	
	// Gets the observed values of the variable of interest
	y = st_data(., obs, touse)
	
	// Gets the unique values across both sets of values ordered from lowest to 
	// highest values
	vals = uniqrows(yhat \ y)
	
	// Stores the unique values in the struct element values
	c.values = vals
	
	// Gets the number of unique values
	levs = rows(vals)	
	
	// Stores the number of unique values in the struct element levs
	c.levs = levs
	
	// Creates a square matrix with missing values 
	conf = J(levs, levs, .)
	
	// Creates column vector with missing values
	rowm = J(levs, 1, .)
	
	// Creates row vector with missing values
	colm = J(1, levs, .)
	
	// Loop over the values of the predicted variable
	for(i = 1; i <= levs; i++) {
		
		// Gets the indices from the predicted variable that have the first 
		// value from the set of all values
		idx = selectindex(yhat :== vals[i, 1])
		
		// Gets the total number of cases predicted to be in the ith class
		rowm[i, 1] = rows(idx)
		
		// Gets the total number of cases observed in the ith class
		colm[1, i] = rows(selectindex(y :== vals[i, 1]))
		
		// Loop over the values of the observed variable
		for(j = 1; j <= levs; j++) {
			
			// count the number of cases with the ith predicted value that have 
			// the jth observed value
			conf[i, j] = rows(selectindex(y[idx, 1] :== vals[j, 1]))
			
		} // End Loop over the observed variable values	
		
	} // End Loop over the rows of the confusion matrix
	
	// Stores the column margins in the struct element colm
	c.colm = colm
	
	// Stores the row margins in the struct element rowm
	c.rowm = rowm
	
	// Stores the confusion matrix in the struct element conf
	c.conf = conf
	
	// Test if this is a 2x2 matrix
	if (levs == 2) {
		
		// reverse codes the confusion matrix for binary metric options to use
		// (e.g., if the user specifies they want to use a different 
		// "event_level" in the R functions' terminology it will use this)
		c.revconf = (conf[2, 2], conf[2, 1] \ conf[1, 2], conf[1, 1])
		
		// Computes the column margins from the reverse coded confusion matrix
		c.revcolm = colsum(c.revconf)
		
		// Computes the row margins from the reverse coded confusion matrix
		c.revrowm = rowsum(c.revconf)
		
	} // End IF Block for binary case
	
	// For any multinomial/ordinal case with > 2 distinct levels
	else {
		
		// Return a null matrix for the reverse coded confusion matrix
		c.revconf = J(0, 0, .)
		
		// Return a null vector for the reverse coded column margins
		c.revcolm = J(1, 0, .)
		
		// Return a null vector for the reverse coded row margins
		c.revrowm = J(0, 1, .)
		
	} // End ELSE Block to populate struct elements with null matrices/vectors
	
	// Stores the total number of observations in the struct element n
	c.n = sum(conf)
	
	// Stores count of correctly predicted classes in the struct element correct 
	c.correct = diagonal(conf)
	
	// Stores the total correctly classified cases in the struct element tp
	c.tp = sum(diagonal(conf))
	
	// Returns the struct with all of this information precomputed
	return(c)
	
} // End definition of cross-tabulation function

// Defines a function to test the nesting of values in a dataset
real scalar isnested(string scalar varnms, string scalar touse) {
	
	// Declares a matrix to store the data that we need to check nesting on
	real matrix df
	
	// Declares a scalar for the number of columns in the matrix and to identify 
	// the column with the variable that is most deeply nested
	real scalar vars
	
	// Gets the unique combinations of the data that should be nested
	df = uniqrows(st_data(., varnms, touse))
	
	// Gets the number of columns in the matrix with the data
	vars = cols(df)
	
	// Returns a value of 1 if the data are nested and a value of 0 otherwise
	return(rows(df) == rows(uniqrows(df[., vars])) ? 1 : 0)
	
} // End definition of function to check the nesting of variables

// Defines a function to retrieve the distribution date from the file passed to 
// the function.
string scalar distdate(string scalar fname) {
	
	// Declares a scalar to store the file handle
	real scalar fh
	
	// Declares a scalar to store the contents of a single line of the file
	string scalar line
	
	// Opens a connection to the file passed as a parameter
	fh = fopen(fname, "r")
	
	// Loops over the lines of the file from start to end
	while ((line = fget(fh)) != J(0, 0, "")) {
		
		// Tests if this is the line that has the distro date
		if (ustrregexm(line, "^\*!\s([0-9]{1,2}[a-z]{3}[0-9]{4})")) {
			
			// Closes the connection to the file
			fclose(fh)

			// Returns the distrodate
			return(ustrregexs(1))
		
		} // End IF Block to find the star bang with the distrodate
		
	} // End of loop over the source code file
	
} // End of function definition to retrieve distribution date	

// Create a function to implement the Poisson density function
real colvector dpois(real colvector events, real colvector means, | 		 ///   
					 real scalar ln){
	
	// Declares a column vector to store the densities temporarily
	real colvector density
	
	// Compute the density 
	density = means :^ events :* exp(-means) :/ factorial(events)
	
	// Test if there is an argument passed to ln and it equals 1
	if (args() == 3 & ln == 1) return(log(density))
	
	// Otherwise return the density without transformation
	else return(density)

} // End definition for the Poisson density function

// Defines function to make weighting matrix for Kappa
real matrix kappawgts(real scalar levs, real scalar pow) {
	
	// Declares a real matrix that will be returned
	real matrix wgts
	
	// This is equivalent to:
	// w <- rlang::seq2(0L, n_levels - 1L)
	// w <- matrix(w, nrow = n_levels, ncol = n_levels)
	wgts = J(1, levs, (0::levs - 1))
	
	// This generates the weight matrix
	return(abs(wgts - wgts'):^pow)
	
} // End of function definition for MC Kappa weighting matrix

// Defines a function to convert abbreviated option names to their full names
string scalar fullnm(string scalar key) {
	
	// Declares an associative array
	class AssociativeArray scalar nm
	
	// Reinitialize the associative array
	nm.reinit("string", 1)
	
	// populate the associative array with the key value pairs
	nm.put("u", "uid")
	nm.put("tp", "tpoint")
	nm.put("kf", "kfold")
	nm.put("spl", "split")
	nm.put("res", "results")
	nm.put("noall", "noall")
	nm.put("dis", "display")
	nm.put("na", "name")
	nm.put("fit", "fitnm")
	nm.put("ps", "pstub")
	nm.put("c", "classes")
	nm.put("thr", "threshold")
	nm.put("mod", "modifin")
	nm.put("kfi", "kfifin")
	nm.put("pm", "pmethod")
	nm.put("po", "popts")
	nm.put("me", "metric")
	nm.put("o", "obs")
	nm.put("mo", "monitors")
	nm.put("val", "valnm")
	nm.put("ret", "retain")
	nm.put("state", "state")
	nm.put("g", "grid")
	nm.put("par", "params")
	nm.put("tun", "tuner")
	nm.put("seed", "seed")
	nm.put("rep", "replay")
	nm.put("loo", "loo")
	
	// Sets the not found default to be the value passed to the function
	nm.notfound(key)
	
	// Returns the value based on the key passed to this function
	return(nm.get(key))
	
} // End of function definition to expand available option names

// End Mata interpreter
end

**# Binary Metrics
/*******************************************************************************
*                                                                              *
*                        Binary Classification Metrics                         *
*                                                                              *
*******************************************************************************/

// Start mata
mata: 

// Can define each of the common metrics/monitors for binary prediction, based 
// on passing the arguments for the confusion matrix.  There will be additional 
// computational overhead this way, but we could also consider coding around 
// this so we would return the confusion matrix to a Mata object and then do the 
// subsequent computations on the single confusion matrix.

// For sensitivity, specificity, prevalence, ppv, and npv see:
// https://yardstick.tidymodels.org/reference/ppv.html	
// For others in this section see other pages from above			
real scalar sens(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Declares a scalar to store the resulting metric
	real scalar result
	
	// Creates the confusion matrix
	c = xtab(pred, obs, touse)
	
	// For now, at least, we'll restrict these metrics to only the binary case
	// so this assertion will make sure that we have a binary confusion matrix
	// assert(rows(c.conf) == 2 & cols(c.conf) == 2)
	
	// Computes the metric from the confusion matrix
	result = (args() == 4 & opts == 1 ?										 ///   
			  c.revconf[2, 2] / colsum(c.revconf[, 2]) :					 ///   
			  c.conf[2, 2] / colsum(c.conf[, 2]))
	
	// Returns the metric as a scalar
	return(result)
	
} // End of function definition for sensitivity
					
// Function to compute precision from a confusion matrix.  See:
// https://yardstick.tidymodels.org/reference/precision.html for the formula
real scalar prec(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Declares a scalar to store the resulting metric
	real scalar result
	
	// Creates the confusion matrix
	c = xtab(pred, obs, touse)
	
	// For now, at least, we'll restrict these metrics to only the binary case
	// so this assertion will make sure that we have a binary confusion matrix
	// assert(rows(c.conf) == 2 & cols(c.conf) == 2)
	
	// Computes the metric from the confusion matrix
	result = (args() == 4 & opts == 1 ?										 ///   
			  c.revconf[2, 2] / c.revrowm[2, ] :	c.conf[2, 2] / c.rowm[2, ])
			  
	// Returns the metric
	return(result)
	
} // End of function definition for precision

// Function to compute recall, which appears to be a synonym for sensitivity.  
// See https://yardstick.tidymodels.org/reference/precision.html for formula					  
real scalar recall(string scalar pred, string scalar obs, 					 ///   
				   string scalar touse, | transmorphic matrix opts) {
		
	// Declares a scalar to store the result
	real scalar result
	
	// Recall is a synonym for sensitivity, so it just calls that function
	result = sens(pred, obs, touse, opts)
	
	// Returns the metric
	return(result)
	
} // End of function definition for recall
				
// Defines function to compute specificity from a confusion matrix.  
// See: https://yardstick.tidymodels.org/reference/ppv.html for the formula
real scalar spec(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Declares a scalar to store the resulting metric
	real scalar result
	
	// Creates the confusion matrix
	c = xtab(pred, obs, touse)
	
	// For now, at least, we'll restrict these metrics to only the binary case
	// so this assertion will make sure that we have a binary confusion matrix
	// assert(rows(c.conf) == 2 & cols(c.conf) == 2)
	
	// Computes the metric from the confusion matrix
	result = (args() == 4 & opts == 1 ?										 ///   
			  c.revconf[1, 1] / c.revcolm[, 1] : c.conf[1, 1] / c.colm[, 1])
	
	// Returns the metric
	return(result)
	
} // End of function definition for specificity

// Defines a function to compute prevalence.  
// See: https://yardstick.tidymodels.org/reference/ppv.html for the formula
real scalar prev(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Declares a scalar to store the resulting metric
	real scalar result
	
	// Creates the confusion matrix
	c = xtab(pred, obs, touse)
	
	// For now, at least, we'll restrict these metrics to only the binary case
	// so this assertion will make sure that we have a binary confusion matrix
	// assert(rows(c.conf) == 2 & cols(c.conf) == 2)
	
	// Computes the metric from the confusion matrix
	result = (args() == 4 & opts == 1 ?										 ///   
			  c.revcolm[, 2] / c.n : c.colm[, 2] / c.n)

	// Returns the metric
	return(result)
	
} // End of function definition for prevalence

// Defines a function to compute positive predictive value.
// See: https://yardstick.tidymodels.org/reference/ppv.html for the formula
real scalar ppv(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Declares a scalar to store the resulting metric, sensitivity, 
	// specificity, and prevalence (used to compute the metric)
	real scalar result, sens, spec, prev
	
	// Computes sensitivity 
	sens = sens(pred, obs, touse, opts)
	
	// Computes prevalence
	prev = prev(pred, obs, touse, opts)
	
	// Computes specificity
	spec = spec(pred, obs, touse, opts)
	
	// Computes positive predictive value
	result = (sens * prev) / ((sens * prev) + ((1 - spec) * (1 - prev)))
	
	// Returns the metric
	return(result)
	
} // End of function definition for positive predictive value

// Defines a function to compute negative predictive value.
// See: https://yardstick.tidymodels.org/reference/ppv.html for the formula
real scalar npv(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Declares a scalar to store the resulting metric, sensitivity, 
	// specificity, and prevalence (used to compute the metric)
	real scalar result, sens, spec, prev
	
	// Computes sensitivity 
	sens = sens(pred, obs, touse, opts)
	
	// Computes prevalence
	prev = prev(pred, obs, touse, opts)
	
	// Computes specificity
	spec = spec(pred, obs, touse, opts)
	
	// Computes negative predictive value
	result = (spec * (1 - prev)) / (((1 - sens) * prev) + (spec * (1 - prev)))
	
	// Returns the metric
	return(result)
	
} // End of function definition for negative predictive value

// Defines a function to compute accuracy.
real scalar acc(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Declares a scalar to store the resulting metric
	// real scalar result
	
	// Creates the confusion matrix
	c = xtab(pred, obs, touse)
	
	// Computes the metric from the confusion matrix
	// result = c.tp / c.n

	// Returns the metric
	return(c.tp / c.n)
	
} // End of function definition for accuracy

// Defines a function to compute "balanced" accuracy.  
// See https://yardstick.tidymodels.org/reference/bal_accuracy.html for more info
real scalar bacc(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Declares a scalar to store the resulting metric, sensitivity, 
	// specificity, and prevalence (used to compute the metric)
	real scalar result, sens, spec
	
	// Computes sensitivity 
	sens = sens(pred, obs, touse, opts)
	
	// Computes specificity
	spec = spec(pred, obs, touse, opts)
	
	// Computes "balanced" accuracy as the average of sensitivity and specificity
	result = (sens + spec) / 2
	
	// Returns the metric
	return(result)
	
} // End of function definition for balanced accuracy

// Defines function to compute the F1 statistic
// Based on second equation here: https://www.v7labs.com/blog/f1-score-guide
//# Bookmark #1
real scalar f1(string scalar pred, string scalar obs, string scalar touse,   ///   
					| transmorphic matrix opts) {
	
	// Declares a scalar to store the resulting metric, precision, and recall
	real scalar result, prec, rec, beta
	
	// Determine what value of beta to use
	beta = (args() == 4 & eltype(opts) == "real" & cols(opts) == 2 ? opts[1, 2] : 1)

	// Computes precision
	prec = (args() == 4 & eltype(opts) == "real" ?							 ///   
			prec(pred, obs, touse, opts[1, 1]) : prec(pred, obs, touse))

	// Computes recall
	rec = (args() == 4 & eltype(opts) == "real" ?							 ///   
			recall(pred, obs, touse, opts[1, 1]) : recall(pred, obs, touse))
	
	// Computes the f1 score 
	result = ((1 + beta^2) * prec * rec) / ((beta^2 * prec) + rec)
	
	// Returns the metric
	return(result)
	
} // End of function definition for f1score

// Defines J-index (Youden's J statistic)
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-j_index.R
real scalar jindex(string scalar pred, string scalar obs, 					 ///   
				   string scalar touse, | transmorphic matrix opts) {

	// Return the micro averaged detection prevalence
	return(sens(pred, obs, touse, opts) + spec(pred, obs, touse, opts) - 1)

} // End of function definition for j-index

// Defines a binary R^2 (tetrachoric correlation)
real scalar binr2(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {

	// Declares a scalar to store the result
	real scalar result
					
	// Call the tetrachoric command in Stata
	stata("cap: qui: tetrachoric " + pred + " " + obs + " if " + touse + ", ed")

	// Stores the result in a variable
	result = st_numscalar("r(rho)")
	
	// If the result is missing, return a missing value
	if (hasmissing(result)) return(J(1, 1, .))
	
	// otherwise return the correlation coefficient
	else return(result)

} // End of function definition for binary R^2 

// Defines Matthews correlation coefficient
// based on: https://en.wikipedia.org/wiki/Phi_coefficient#Multiclass_case
real scalar mcc(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
		
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// To store the row margins
	real colvector rowm
	
	// To store the column margins
	real rowvector colm
	
	// Scalars used in the computation
	real scalar i, num, den1, den2
	
	// Stores confusion matrix
	c = xtab(pred, obs, touse)
	
	// If the confusion matrix isn't square return a missing value.
	if (rows(c.conf) != cols(c.conf)) return(.)
	
	// Stores the row margins
	rowm = c.rowm
	
	// Stores the column margins
	colm = c.colm
	
	// Stores the first term for the numerator (correct classified * n)
	num = c.tp * c.n
		
	// Initializes the denominator terms
	den1 = c.n^2
	den2 = den1
	
	// Loop over the dimension for the margins
	for(i = 1; i <= rows(rowm); i++) {
		
		// Starts subtracting the true * predicted cell sizes
		num = num - colm[1, i] * rowm[i, 1]
		
		// Subtracts the squared number of predicted cases for each value 
		// from the squared sample size
		den1 = den1 - rowm[i, 1]^2
		
		// Subtracts the squared number of observed cases for each value from 
		// the squared sample size
		den2 = den2 - colm[1, i]^2
		
	} // End Loop over the margins
	
	// Return the correlation coefficient
	return(num / (sqrt(den1) * sqrt(den2)))
	
} // End of function definition for Matthew's correlation coefficent

// End mata
end

**# Multinomial Metrics
/*******************************************************************************
*                                                                              *
*                     Multinomial Classification Metrics                       *
*                                                                              *
*******************************************************************************/

// Start mata
mata:

// Defines multiclass specificity
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-spec.R
real scalar mcspec(string scalar pred, string scalar obs, 					 ///   
				   string scalar touse, | transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Declares column vectors to store the total sample size in a J(x, 1, n) 
	// sized column vector, , true positive counts,
	// true positive + false positive counts, true positive + false negative 
	// counts, true negative counts, false positives, and the numerator and 
	// denominator for the metric
	real colvector n, tp, tpfp, tpfn, tn, fp, num, den
	
	// Get the confusion matrix
	c = xtab(pred, obs, touse)
	
	// Store the total sample size in a column vector with the sample size in 
	// each element
	n = J(rows(c.conf), 1, c.n)
	
	// Get the vector of true positives
	tp = c.correct
	
	// Get the true positive + false positive counts
	tpfp = c.rowm
	
	// Get the true positive + false negative counts and transpose the result
	tpfn = c.colm'
	
	// Get the count of true negatives
	tn = n - (tpfp + tpfn - tp)
	
	// Get the count of false positives
	fp = tpfp - tp
	
	// Return the micro average specificity
	return(sum(tn) / sum((tn + fp)))

} // End of function definition for multiclass specificity

// Defines multiclass sensitivity
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-sens.R
real scalar mcsens(string scalar pred, string scalar obs, 					 ///   
				   string scalar touse, | transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Get the confusion matrix
	c = xtab(pred, obs, touse)

	// Return the micro averaged sensitivity
	return(c.tp / sum(c.colm))

} // End of function definition for multiclass sensitivity

// Defines multiclass recall
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-recall.R
real scalar mcrecall(string scalar pred, string scalar obs, 				 ///   
					 string scalar touse, | transmorphic matrix opts) {
	
	// Return the micro averaged recall
	return(mcsens(pred, obs, touse))

} // End of function definition for multiclass recall

// Defines multiclass precision
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-precision.R
real scalar mcprec(string scalar pred, string scalar obs, 					 ///   
				   string scalar touse, | transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Get the confusion matrix
	c = xtab(pred, obs, touse)

	// Return the micro averaged precision
	return(c.tp / sum(c.rowm))

} // End of function definition for multiclass precision

// Defines multiclass positive predictive value
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-ppv.R
real scalar mcppv(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Return the micro averaged PPV
	// Lines 176-178 indicate that multiclass PPV should be equal to precision 
	// in all cases EXCEPT when the prevalence paramter in that function is 
	// passed an argument.  With our method signature, there isn't a way to 
	// pass that parameter.
	return(mcprec(pred, obs, touse))

} // End of function definition for multiclass positive predictive value

// Defines multiclass negative predictive value
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-npv.R
real scalar mcnpv(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Declares column vectors to store the total sample size in a J(x, 1, n) 
	// sized column vector and true positive + false negative counts
	real colvector n, tpfn

	// Declares scalars to store intermediate results
	real scalar prev, sens, spec, num, den
	
	// Get the confusion matrix
	c = xtab(pred, obs, touse)
	
	// Store the total sample size in a column vector with the sample size in 
	// each element
	n = J(c.levs, 1, c.n)
	
	// Get the true positive + false negative counts and transpose the result
	tpfn = c.colm'
	
	// Compute prevalence
	prev = sum(tpfn) / sum(n)
	
	// Compute multiclass sensitivity
	sens = mcsens(pred, obs, touse)
	
	// Compute multiclass specificity
	spec = mcspec(pred, obs, touse)
	
	// Define the numerator for the metric
	num = spec * (1 - prev)

	// Define the denominator for the metric
	den = (1 - sens) * prev + spec * (1 - prev)
	
	// Return the micro averaged NPV
	return(num / den)

} // End of function definition for multiclass negative predictive value

// Defines multiclass F1 statistic
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-f_meas.R
real scalar mcf1(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Declares scalars to store intermediate results
	real scalar prec, sens, beta
	
	// Determine whether to use the default value for beta
	beta = (args() == 4 & eltype(opts) == "real" ? opts[1, 1] : 1)
	
	// Compute precision
	prec = mcprec(pred, obs, touse)
	
	// Compute multiclass sensitivity
	sens = mcsens(pred, obs, touse)
	
	// For the default case return the micro averaged F1 Statistic
	return(((1 + beta^2 ) * prec * sens) / ((beta^2 * prec) + sens))

} // End of function definition for multiclass negative predictive value

// Defines multiclass Detection Prevalence
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-detection_prevalence.R
real scalar mcdetect(string scalar pred, string scalar obs, 				 ///   
					 string scalar touse, | transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c
	
	// Compute the confusion matrix
	c = xtab(pred, obs, touse)
	
	// Return the micro averaged detection prevalence
	return(sum(c.rowm) / sum(J(c.levs, 1, c.n)))

} // End of function definition for multiclass detection prevalence

// Defines multiclass J-index (Youden's J statistic)
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-j_index.R
real scalar mcjindex(string scalar pred, string scalar obs, 				 ///   
					 string scalar touse, | transmorphic matrix opts) {

	// Return the micro averaged detection prevalence
	return(mcsens(pred, obs, touse) + mcspec(pred, obs, touse) - 1)

} // End of function definition for multiclass j-index

// Defines multiclass accuracy
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-accuracy.R
real scalar mcacc(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Return the accuracy
	return(acc(pred, obs, touse))
	
} // End of multiclass accuracy synonym

// Defines multiclass Balanced Accuracy
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-bal_accuracy.R
real scalar mcbacc(string scalar pred, string scalar obs, 					 ///   
				   string scalar touse, | transmorphic matrix opts) {
	
	// Declares scalars to store the intermediate results
	real scalar rec, sens
		
	// Compute the recall
	rec = mcrecall(pred, obs, touse)
	
	// Compute the sensitivity
	sens = mcsens(pred, obs, touse)
	
	// Return the balanced accuracy
	return((rec + sens) / 2)
	
} // End of function definition for multiclass balanced accuracy

// Defines multiclass Kappa
// similar to accuracy, but normalized by accuracy expected by random chance
// based on: https://github.com/tidymodels/yardstick/blob/main/R/class-kap.R
real scalar mckappa(string scalar pred, string scalar obs, 					 ///   
					string scalar touse, | transmorphic matrix opts) {
	
	// Creates the struct that gets returned
	struct Crosstab scalar c

	// Declares matrix to store random chance outcome
	real matrix wgts, conf, expected
	
	// Declares scalars to store the intermediate results
	real scalar i
	
	// Get the confusion matrix
	c = xtab(pred, obs, touse)
	
	// Normalizes the outer product of row margins * col margins
	expected = (c.rowm * c.colm) :/ c.n
	
	// Test whether or not the user is specifying a weighting option
	if (args() == 4 & eltype(opts) == "real") {
		
		// Generate the weighting matrix based on the power specified in the 
		// optional argument
		wgts = kappawgts(c.levs, opts[1, 1])
		
	} // End IF Block for user specified weighting option

	// If the user doesn't specify anything use the default weighting matrix
	else {
		
		// Generates the weighting matrix for the no-weighting case
		wgts = J(c.levs, c.levs, 1)
		
		// Will need to replace the diagonal with 0s for wgts
		for(i = 1; i <= rows(wgts); i++) wgts[i, i] = 0
				
	} // End ELSE Block for default weight matrix
	
	// Return the metric 
	return(1 - sum(wgts * c.conf) / sum(wgts * expected))
	
} // End of function definition for multiclass Kappa 

// Defines multiclass Mathews correlation coefficient
// based on: https://github.com/tidymodels/yardstick/blob/main/src/mcc-multiclass.c
real scalar mcmcc(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Call the other implementation
	return(mcc(pred, obs, touse))
	
} // End of function definition for multiclass MCC

// Defines a multiclass R^2 (polychoric correlation)
real scalar mcordr2(string scalar pred, string scalar obs, 					 ///   
					string scalar touse, | transmorphic matrix opts) {

	// Call the tetrachoric command in Stata
	stata("cap: qui: polychoric " + pred + " " + obs + " if " + touse)

	// Returns the correlation coefficient
	return(st_numscalar("r(rho)"))

} // End of function definition for ordinal R^2

// End mata
end

**# Continuous Metrics
/*******************************************************************************
*                                                                              *
*                          Continuous Metrics/Utilities                        *
*                                                                              *
*******************************************************************************/

// Start mata
mata:

// Defines function to compute mean squared error from predicted and observed 
// outcomes
real scalar mse(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Column vector to store the squared difference of pred - obs
	real colvector sqdiff
	
	// Declares a scalar to store the resulting metric
	real scalar result
	
	// Computes squared differences
	sqdiff = (st_data(., obs, touse) - st_data(., pred, touse)) :^2
	
	// Computes the average of the squared differences
	result = sum(sqdiff) / rows(sqdiff)
	
	// Returns the mean squared error
	return(result)

} // End of function definition for MSE

// Defines function to compute mean absolute error from predicted and observed 
// outcomes
real scalar mae(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Column vector to store the absolute difference of pred - obs
	real colvector absdiff
	
	// Declares a scalar to store the resulting metric
	real scalar result
	
	// Computes absolute differences
	absdiff = abs(st_data(., obs, touse) - st_data(., pred, touse))
	
	// Computes the average of the squared differences
	result = sum(absdiff) / rows(absdiff)
	
	// Returns the mean absolute error
	return(result)

} // End of function definition for MAE

// Metric based on definition here:
// https://developer.nvidia.com/blog/a-comprehensive-overview-of-regression-evaluation-metrics/
real scalar bias(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Returns the sum of residuals
	return(sum(st_data(., obs, touse) - st_data(., pred, touse)))
	
} // End of function definition for bias

// Metric based on definition here:
// https://developer.nvidia.com/blog/a-comprehensive-overview-of-regression-evaluation-metrics/
real scalar mbe(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Returns the sum of residuals
	return(sum(st_data(., obs, touse) - st_data(., pred, touse)) /			 ///   
		   rows(st_data(., obs, touse)))
	 
} // End of function definition for mean bias error

// Metric based on definition here:
// https://github.com/tidymodels/yardstick/blob/main/R/num-rsq.R
real scalar r2(string scalar pred, string scalar obs, string scalar touse,   ///   
					| transmorphic matrix opts) {
	
	// Returns the correlation between the predicted and observed variable
	return(corr(variance((st_data(., pred, touse), ///   
						  st_data(., obs, touse))))[2, 1])
	
} // End of function definition for R^2

// Creates function for root mean squared error
real scalar rmse(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {

	// Returns the square root of the mean squared error
	return(sqrt(mse(pred, obs, touse)))

} // End of function definition for RMSE

// Metric based on definition of mean absolute percentage error here:
// https://developer.nvidia.com/blog/a-comprehensive-overview-of-regression-evaluation-metrics/
real scalar mape(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {

	// Allocates a column vector to store the differences
	real colvector diff
	
	// Computes the residuals
	diff = st_data(., obs, touse) - st_data(., pred, touse)
	
	// Returns the sum of the absolute value of the residual divided by observed 
	// value, which is then divided by the number of observations
	return(sum(abs(diff :/ st_data(., obs, touse))) / rows(st_data(., obs, touse)))
	
} // End of function definition for mean absolute percentage error


// Metric based on definition of symmetric mean absolute percentage error here:
// https://github.com/tidymodels/yardstick/blob/main/R/num-smape.R
real scalar smape(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {

	// Allocates a column vector to store the differences
	real colvector num, denom
	
	// Creates a column vector with the observed outcome data
	num = abs(st_data(., obs, touse) - st_data(., pred, touse))
	
	// Computes the residuals
	denom = (abs(st_data(., obs, touse)) + abs(st_data(., pred, touse))) :/ 2
	
	// Returns the sum of the absolute value of the residual divided by the 
	// average of the sum of absolute observed and predicted values, divided by 
	// the number of observations
	return(sum(num :/ denom) / rows(denom))
	
} // End of function definition for mean absolute percentage error

// Defines function to compute mean squared log error from predicted and observed 
// outcomes.  Based on definition here:
// https://developer.nvidia.com/blog/a-comprehensive-overview-of-regression-evaluation-metrics/
real scalar msle(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Column vector to store the squared difference of pred - obs
	real colvector sqdiff
	
	// Declares a scalar to store the resulting metric
	real scalar result
	
	// Computes squared differences
	sqdiff = (log(st_data(., obs, touse) :+ 1) - log(st_data(., pred, touse)  :+ 1)) :^2
	
	// Computes the average of the squared differences
	result = sum(sqdiff) / rows(sqdiff)
	
	// Returns the mean squared error
	return(result)

} // End of function definition for mean squared log error

// Defines function to compute the root mean squared log error.  Based on 
// definition here:
// https://developer.nvidia.com/blog/a-comprehensive-overview-of-regression-evaluation-metrics/
real scalar rmsle(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {

	// Returns the square root of the mean squared log error
	return(sqrt(msle(pred, obs, touse)))
	
} // End of function definition for root mean squared log error

// Defines function for the ratio of performance to deviation
// based on: https://github.com/tidymodels/yardstick/blob/main/R/num-rpd.R
real scalar rpd(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Returns the ratio of the SD of predicted to the RMSE
	return(sqrt(variance(st_data(., pred, touse))) / rmse(pred, obs, touse))

} // End of function definition for RPD

// Defines function for the Index of ideality of correlation
// based on: https://github.com/tidymodels/yardstick/blob/main/R/num-iic.R
real scalar iic(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Declares some columnvectors
	real colvector neg, pos, delta
	
	// Declares scalars needed
	real scalar muneg, mupos, adj

	// Gets the difference in observed vs predict values
	delta = st_data(., obs, touse) - st_data(., pred, touse)
	
	// Selects only the differences that are negative
	neg = select(delta, delta :< 0)
	
	// Selects only the differences that are positive
	pos = select(delta, delta :>= 0)
	
	// Computes the absolute mean of the negative values
	muneg = sum(abs(neg)) / rows(neg)
	
	// Computes the absolute mean of the positive values
	mupos = sum(abs(pos)) / rows(pos)
	
	// Computes the adjustment factor for the correlation
	adj = min((muneg, mupos)) / max((muneg, mupos))
	
	// Returns the adjusted correlation
	return(r2(pred, obs, touse) * adj)

} // End of function definition for IIC

// Defines function for the Concordance Correlation Coefficient
// based on: https://github.com/tidymodels/yardstick/blob/main/R/num-ccc.R
real scalar ccc(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Declares scalars needed
	real scalar mupred, muobs, varpred, varobs, cov, n
	
	// Get the number of rows of the data
	n = rows(st_data(., obs, touse))

	// estimate_mean in R function
	mupred = mean(st_data(., pred, touse))

	// truth_mean in R function
	muobs = mean(st_data(., obs, touse)) 

	// estimate_variance in R function
	varpred = variance(st_data(., pred, touse))[1, 1] 

	// truth_variance in R function
	varobs = variance(st_data(., obs, touse))[1, 1] 

	// Gets the covariance between the predicted and observed values
	cov = variance((st_data(., pred, touse), st_data(., obs, touse)))[2, 1] 

	// Check for option to use the bias term
	if (args() == 4 & opts == 1) {
		
		// Adjust the values using the bias term
		return((2 * (cov * (n - 1) / n)) /									 ///   
		((varobs * (n - 1) / n) + (varpred * (n - 1) / n) + (muobs - mupred)^2))
		
	} // End ELSE Block for the use of the bias term
	
	// Computes and returns the coefficient w/o the bias term
	else return((2 * cov) / (varobs + varpred + (muobs - mupred)^2))
	
} // End of function definition for CCC

// Defines function for Pseudo-Huber Loss
// based on: https://github.com/tidymodels/yardstick/blob/main/R/num-pseudo_huber_loss.R
real scalar phl(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Declares a column vector to store the errors
	real colvector a
	
	// Declares a real scalar for delta
	real scalar delta
	
	// Test the number of arguments passed
	delta = (args() == 4 & eltype(opts) == "real" ? opts[1, 1] : 1)
	
	// Gets the difference between the observed and predicted values
	a = st_data(., obs, touse) - st_data(., pred, touse)

	// Computes and returns the loss function value
	return(mean(delta^2 :* (sqrt(1 :+ (a :/ delta) :^2) :- 1)))
	
} // End of function definition for Pseud-Huber Loss

// Defines function for Huber Loss
// based on: https://github.com/tidymodels/yardstick/blob/main/R/num-huber_loss.R
real scalar huber(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Declares a column vector to store the errors and absolute errors
	real colvector a, absa 
	
	// Declares a scalar for delta that would be passed in opts
	real scalar delta
	
	// Assign delta
	delta = (args() == 4 & eltype(opts) == "real" ? opts[1, 1] : 1)
	
	// Gets the difference between the observed and predicted values
	a = st_data(., obs, touse) - st_data(., pred, touse)
	
	// Gets the absolute difference of the observed and predicted values
	absa = abs(a)
	 
	// Computes and returns the loss function value
	return(mean(0.5 :* a[selectindex(absa :<= delta)]:^ 2 \						 ///   
				delta :* absa[selectindex(absa :> delta)] :- 0.5 * delta))
	
} // End of function definition for the Huber Loss function

// Defines function for Poisson Log Loss
// based on: https://github.com/tidymodels/yardstick/blob/main/R/num-poisson_log_loss.R
real scalar pll(string scalar pred, string scalar obs, string scalar touse,  ///   
					| transmorphic matrix opts) {
	
	// Returns the mean of the negative log poisson density
	return(mean(-dpois(st_data(., obs, touse), st_data(., pred, touse), 1)))
	
} // End of function definition for Poisson Log Loss

// Defines function for ratio of performance to interquartile range
// based on: https://github.com/tidymodels/yardstick/blob/main/R/num-rpiq.R
real scalar rpiq(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {

	// Declares scalar to store the iqr
	real scalar iqr
	
	// quietly calls the summarize command on the observed values
	stata("qui: su " + obs + " if " + touse + " == 1, de")
	
	// Get the interquartile range
	iqr = st_numscalar("r(p75)") - st_numscalar("r(p25)")

	// Returns the ratio of performance to interquartile range
	return(iqr / rmse(pred, obs, touse))
	
} // End of function definition for Poisson Log Loss

// Defines function for "Traditional" R^2
// based on: https://github.com/tidymodels/yardstick/blob/main/R/num-rsq_trad.R
real scalar r2ss(string scalar pred, string scalar obs, string scalar touse, ///   
					| transmorphic matrix opts) {
	
	// Declares a scalar to store the mean of the observed values
	real scalar mu
	
	// Mean of the observed values
	mu = mean(st_data(., obs, touse))

	// Returns 1 - (Residual SS / Total SS)
	return(1 - (sum((st_data(., obs, touse) - st_data(., pred, touse)):^2) / ///   
				sum((st_data(., obs, touse) :- mu):^2)))

} // End of function definition for "Traditional" R^2



// End mata interpreter
end