*! version 1.1.0 - Mike Lacy 28sep2010
// Many helpful modifications from Richard Williams
// //// ***************************************************************
//  Sept. 2010. Changed method of getting # of response categories, kcat
//  Changed to modern comment style. Changed items in return list.
//               
//  April 2006:  Changed display of results so as to scale them
//  by 1/N. They are thus analogous to variances rather than sums of squares
//  and match what is reported in the article.
//
// July 2007: Make sum of squares analogue available in return lists.
//
// No standard Stata help file is available yet.
// See Comments and "Usage" below.
// ***************************************************************
//
capture program drop  r2o
program define r2o, rclass
   version 7
// This program calculates the ordinal r2o measure and its adjusted
// version as described in:
// Lacy, Michael G. 2006.  "An Explained Variation Measure for
// Ordinal Response Models with Comparisons to Other Ordinal R2
// Measures." Sociological Methods and Research 32,4.
//
// The program below presumes that a categorical response model (ologit
// mlogit oprobit gologit2) that can yield predicted probabiliites
// via -predict- has just been run and that its e() list is still
// intact.
//
// The "trustme" option can be used to force the program to run even if
// the user has not just executed one of the preceding response model programs.
// This option would be relevant for the user who has some other response model
// program that works with -predict-.
//
// Returns: The regular r2o and its bias adjusted version in
// r(r2o) and r(ur2o)
//
// Begin program
// Detect whether an appropriate categorical response estimation command has been run
   syntax [, TRustme] [NOMarg]
   tempname ok
   scalar `ok' = 0
   local clist = "ologit mlogit oprobit gologit2"
   foreach c of local clist {
      if !`ok' & e(cmd)== "`c'" {
      scalar `ok' = 1
      }
   }
// User can force the program to work with his or her command
   if "`trustme'"!="" {
      scalar `ok' = 1
   }
   if  !`ok' {
      display as error "Preceding estimation command must be able to generate predicted probabilities."
      display as error "Command -r2o- not executed"
      exit
   }
// Else, we've got a response model that gives  predicted probs., so we can go ahead.
// First capture stuff from the response model just run.
// This will be used to yield the predicted Prob(Y), given the X vector
   tempvar touse
	mark `touse' if e(sample)        // mark the cases used to yield the prediction 
   local depvar = "`e(depvar)'"     // name of dependent variable 
   qui levelsof `depvar' if `touse'
   local kcat = wordcount(r(levels)) //# response categories 
   tempname numcovar N
   scalar `numcovar' = e(df_m)      // number of covariates 
   scalar `N' = e(N)                // Sample size from  model just run 
//
// Get the weighting information, if any
// Need to change pweights to aweights so tabulate & sum can handle them
   if "`e(wtype)'"=="pweight"{
      local wgt "[aweight`e(wexp)']"
   }
   else if "`e(wtype)'"!="" {
      local wgt "[`e(wtype)'`e(wexp)']"
   }

// Or, if you just want to kill support for weights altogether, uncomment these lines.
// if "`e(wtype)'"!=""{
//     display as error "Weights are not supported"
//     exit
// }

//
// Obtain total variation of the ordinal response variable from its marginal distribution.
// The marginal distribution comes from a simple tabulation, and then
// the ordinal variation measure is calculated on this distribution.
   di " "
   tempname F  // to eventually hold a vector of predicted cumulative probabilities
   if ("`nomarg'" == "") {  // user wants to see the marginals
      display as text "Marginal distribution for cases in the estimation sample."
      tabulate `depvar' if `touse' `wgt', matcell(`F')
   }
   else {
      qui tabulate `depvar' if `touse' `wgt', matcell(`F')
   }
// Marginal frequencies are now in the vector F, ordered from 1 to r(r).
// Convert them to cumulative relative frequencies
//
   matrix `F'[1,1] = `F'[1,1]/`N'
   forvalues ir = 2/`r(r)' {
   matrix `F'[`ir',1] = (`F'[`ir'-1, 1]) + `F'[`ir', 1]/`N'
   }
//
// Now calculate the marginal variation, SY, by applying ordinal
// variation function, Sum (F_j(1-F_j), j = 1..k to the marginal
// cum F distribution.
// The sum actually needs only to go to `r(r)' -1 but since F_r(r) = 1,
// there is no harm in running up to r(r).
   tempname SY
   scalar `SY' = 0
   forvalues ir = 1/`r(r)' {
      scalar `SY' = `SY' + ( `F'[`ir',1] * ( 1.0 - `F'[`ir',1]))
    }
   scalar `SY' = `SY'
   matrix drop `F'   // avoid confusion later with another F 
//
//
// Start on conditional variation SYX, i.e., the variation of Y given X .
// Create list of temporary variables, `F1'... to hold predicted
// probabilities for each case as created by the response model
// the user just executed.
   local Flist = ""
   forvalues i = 1/`kcat' {
      tempvar F`i'
      local Flist = "`Flist'`F`i'' "     // `F1' `F2' ... 
   }
   quietly predict `Flist' if `touse'    // predicted prob for each case into F1, ... 
// Cumulate the predicted probabilities for each case
   tempvar prev
   qui gen `prev' = `F1'
   forvalues i = 2/`kcat' {
      quietly replace `F`i'' = `prev'  + `F`i''
      quietly replace `prev' = `F`i''
   }
// The sum actually needs only to go to `kcat'-1, but since F_kcat = 1,
// there is no harm in running up to kcat.
   tempvar iSYX
   gen `iSYX' = 0      // individual cases contribution to the variation of Y given X
   forvalues i = 1/`kcat' {
      quietly replace `iSYX' = `iSYX' + `F`i'' * (1.0 - `F`i'')
   }
//
// Mean of iSYX across all cases is the SYX
   sum `iSYX' if `touse' `wgt', meanonly
   tempname SYX
   scalar `SYX' = r(mean)
   tempname r2o ur2o
   scalar `r2o' = 1.0 - `SYX'/`SY'       // the basic ordinal R^2 measure 
   return scalar r2o = `r2o'
   return scalar vtot = `SY'                // total variation 
	return scalar verr = `SYX'               // error variation 
	return scalar vmodel = (`SY' - `SYX')   // explained  variation 
	
//
//
// Now do the bias corrected version of r2o, calculated like
// the adjusted version of a conventional R^2.  `numcovar' denotes the
// number of covariates used in the response model
   scalar `ur2o' = 1.0 - (`SYX'/`SY') * ((`N'-1)/(`N'- `numcovar' -1))
   return scalar ur2o = `ur2o'
//
   display  ""
   display as text ///
   "Total       Model       Error       Lacy        Bias Adj."  _newline ///
   "Variation   Variation   Variation   r2o         r2o"       _newline ///
   _dup(58) char(175) _newline ///
   as result ///
   %-12.6f `SY' %-12.6f (`SY' - `SYX') %-12.6f `SYX' %-12.5f `r2o'  %-12.5f `ur2o'
//
end