*! version 1.1.0 - Mike Lacy 28sep2010
// Many helpful modifications from Richard Williams
// //// ***************************************************************
// Sept. 2010. Changed method of getting # of response categories, kcat
// Changed to modern comment style. Changed items in return list.
//
// April 2006: Changed display of results so as to scale them
// by 1/N. They are thus analogous to variances rather than sums of squares
// and match what is reported in the article.
//
// July 2007: Make sum of squares analogue available in return lists.
//
// No standard Stata help file is available yet.
// See Comments and "Usage" below.
// ***************************************************************
//
capture program drop r2o
program define r2o, rclass
version 7
// This program calculates the ordinal r2o measure and its adjusted
// version as described in:
// Lacy, Michael G. 2006. "An Explained Variation Measure for
// Ordinal Response Models with Comparisons to Other Ordinal R2
// Measures." Sociological Methods and Research 32,4.
//
// The program below presumes that a categorical response model (ologit
// mlogit oprobit gologit2) that can yield predicted probabiliites
// via -predict- has just been run and that its e() list is still
// intact.
//
// The "trustme" option can be used to force the program to run even if
// the user has not just executed one of the preceding response model programs.
// This option would be relevant for the user who has some other response model
// program that works with -predict-.
//
// Returns: The regular r2o and its bias adjusted version in
// r(r2o) and r(ur2o)
//
// Begin program
// Detect whether an appropriate categorical response estimation command has been run
syntax [, TRustme] [NOMarg]
tempname ok
scalar `ok' = 0
local clist = "ologit mlogit oprobit gologit2"
foreach c of local clist {
if !`ok' & e(cmd)== "`c'" {
scalar `ok' = 1
}
}
// User can force the program to work with his or her command
if "`trustme'"!="" {
scalar `ok' = 1
}
if !`ok' {
display as error "Preceding estimation command must be able to generate predicted probabilities."
display as error "Command -r2o- not executed"
exit
}
// Else, we've got a response model that gives predicted probs., so we can go ahead.
// First capture stuff from the response model just run.
// This will be used to yield the predicted Prob(Y), given the X vector
tempvar touse
mark `touse' if e(sample) // mark the cases used to yield the prediction
local depvar = "`e(depvar)'" // name of dependent variable
qui levelsof `depvar' if `touse'
local kcat = wordcount(r(levels)) //# response categories
tempname numcovar N
scalar `numcovar' = e(df_m) // number of covariates
scalar `N' = e(N) // Sample size from model just run
//
// Get the weighting information, if any
// Need to change pweights to aweights so tabulate & sum can handle them
if "`e(wtype)'"=="pweight"{
local wgt "[aweight`e(wexp)']"
}
else if "`e(wtype)'"!="" {
local wgt "[`e(wtype)'`e(wexp)']"
}
// Or, if you just want to kill support for weights altogether, uncomment these lines.
// if "`e(wtype)'"!=""{
// display as error "Weights are not supported"
// exit
// }
//
// Obtain total variation of the ordinal response variable from its marginal distribution.
// The marginal distribution comes from a simple tabulation, and then
// the ordinal variation measure is calculated on this distribution.
di " "
tempname F // to eventually hold a vector of predicted cumulative probabilities
if ("`nomarg'" == "") { // user wants to see the marginals
display as text "Marginal distribution for cases in the estimation sample."
tabulate `depvar' if `touse' `wgt', matcell(`F')
}
else {
qui tabulate `depvar' if `touse' `wgt', matcell(`F')
}
// Marginal frequencies are now in the vector F, ordered from 1 to r(r).
// Convert them to cumulative relative frequencies
//
matrix `F'[1,1] = `F'[1,1]/`N'
forvalues ir = 2/`r(r)' {
matrix `F'[`ir',1] = (`F'[`ir'-1, 1]) + `F'[`ir', 1]/`N'
}
//
// Now calculate the marginal variation, SY, by applying ordinal
// variation function, Sum (F_j(1-F_j), j = 1..k to the marginal
// cum F distribution.
// The sum actually needs only to go to `r(r)' -1 but since F_r(r) = 1,
// there is no harm in running up to r(r).
tempname SY
scalar `SY' = 0
forvalues ir = 1/`r(r)' {
scalar `SY' = `SY' + ( `F'[`ir',1] * ( 1.0 - `F'[`ir',1]))
}
scalar `SY' = `SY'
matrix drop `F' // avoid confusion later with another F
//
//
// Start on conditional variation SYX, i.e., the variation of Y given X .
// Create list of temporary variables, `F1'... to hold predicted
// probabilities for each case as created by the response model
// the user just executed.
local Flist = ""
forvalues i = 1/`kcat' {
tempvar F`i'
local Flist = "`Flist'`F`i'' " // `F1' `F2' ...
}
quietly predict `Flist' if `touse' // predicted prob for each case into F1, ...
// Cumulate the predicted probabilities for each case
tempvar prev
qui gen `prev' = `F1'
forvalues i = 2/`kcat' {
quietly replace `F`i'' = `prev' + `F`i''
quietly replace `prev' = `F`i''
}
// The sum actually needs only to go to `kcat'-1, but since F_kcat = 1,
// there is no harm in running up to kcat.
tempvar iSYX
gen `iSYX' = 0 // individual cases contribution to the variation of Y given X
forvalues i = 1/`kcat' {
quietly replace `iSYX' = `iSYX' + `F`i'' * (1.0 - `F`i'')
}
//
// Mean of iSYX across all cases is the SYX
sum `iSYX' if `touse' `wgt', meanonly
tempname SYX
scalar `SYX' = r(mean)
tempname r2o ur2o
scalar `r2o' = 1.0 - `SYX'/`SY' // the basic ordinal R^2 measure
return scalar r2o = `r2o'
return scalar vtot = `SY' // total variation
return scalar verr = `SYX' // error variation
return scalar vmodel = (`SY' - `SYX') // explained variation
//
//
// Now do the bias corrected version of r2o, calculated like
// the adjusted version of a conventional R^2. `numcovar' denotes the
// number of covariates used in the response model
scalar `ur2o' = 1.0 - (`SYX'/`SY') * ((`N'-1)/(`N'- `numcovar' -1))
return scalar ur2o = `ur2o'
//
display ""
display as text ///
"Total Model Error Lacy Bias Adj." _newline ///
"Variation Variation Variation r2o r2o" _newline ///
_dup(58) char(175) _newline ///
as result ///
%-12.6f `SY' %-12.6f (`SY' - `SYX') %-12.6f `SYX' %-12.5f `r2o' %-12.5f `ur2o'
//
end