// scoregof
// Score test of normality for probit and bivariate probit models.
// Postestimation command for -probit- and -biprobit-
// Richard C. Chiburis 11/1/2009
// Based on Murphy, "Score tests of normality in bivariate probit models," Economics Letters 2007.
// I corrected some typos in Murphy's paper.
//
// Usage:
// For asymptotic p-value:   scoregof
// For bootstrapped p-value: scoregof, bootstrap(#)
//
// Cannot be used if -probit-, -dprobit-, or -biprobit- was called with any of these options:
// partial, offset, constraints
program scoregof, rclass
  version 10.1
  syntax [, BOOTstrap(integer 0)]

  if missing(e(cmd)) {
    error 301
  }
  if e(cmd) != "probit" & e(cmd) != "dprobit" & e(cmd) != "biprobit"  {
    di as err "scoregof can only be called after probit, dprobit, or biprobit"
    exit 321
  }

  if e(cmd) == "probit" | e(cmd) == "dprobit" {
    tempname b numparams
    matrix `b' = e(b)
    scalar `numparams' = colsof(`b') + 2

    // Get the LHS variable
    local y = e(depvar)

    // Get the RHS variables
    local regressors : colnames e(b)
    local noconstant = ",noconstant"
    while 1 {
      gettoken regressori regressors : regressors
      if "`regressori'" == "" {
        continue, break
      }
      else if "`regressori'" == "_cons" {
        local noconstant
      }
      else {
        local x `x' `regressori'
      }
    }

    preserve
    quietly keep if e(sample)  // use only observations in the estimation sample

    // ml model statement is invoked so that mlvecsum and mlmatsum will work below,
    // but we don't re-run the maximization here; we just initialize with the
    // estimated parameters computed by -probit- previously.
    tempname b2 extraequation
    ml model d2 scoregof_null (`y':`y'=`x'`noconstant') /`extraequation'
    matrix coleq `b' = `y'
    matrix `b2' = 0
    matrix colnames `b2' = `extraequation':_cons
    matrix `b' = (`b', `b2')
    ml init `b'

    tempvar xb
    predict `xb', xb

    ********** Likelihood **********
    tempvar s fi
    tempname lnf
    qui gen byte `s' = 2 * `y' - 1
    qui gen double `fi' = normal(`s'*`xb')
    scalar `lnf' = 0  // only needed because mlvecsum and mlmatsum require this as an input but don't really use it

    ********** Gradient **********
    tempvar g1i g2i g3i
    qui gen double `g1i' = `s' * normalden(`xb') / `fi'
    qui gen double `g2i' = (`xb'^2 - 1) * `g1i' / 6
    qui gen double `g3i' = -(`xb'^3 - 3*`xb') * `g1i' / 24

    tempname gj g
    forvalues j = 1/3 {
      local eqj = min(`j', 2)
      mlvecsum `lnf' `gj' = `g`j'i', eq(`eqj')
      matrix `g' = (nullmat(`g'), `gj')
    }

    ********** Information Matrix **********
    tempvar Hi
    tempname Hs Hj Hjk negH
    qui gen double `Hi' = .
    matrix `negH' = J(`numparams', `numparams', 0)
    forvalues yy = 0/1 {
      qui replace `s'   = 2 * `yy' - 1
      qui replace `fi'  = normal(`s'*`xb')
      qui replace `g1i' = `s' * normalden(`xb') / `fi'
      qui replace `g2i' = (`xb'^2 - 1) * `g1i' / 6
      qui replace `g3i' = -(`xb'^3 - 3*`xb') * `g1i' / 24

      forvalues j = 1/3 {
        local eqj = min(`j', 2)
        forvalues k = 1/3 {
          local eqk = min(`k', 2)
          qui replace `Hi' = `fi' * `g`j'i' * `g`k'i'
          mlmatsum `lnf' `Hjk' = `Hi', eq(`eqj',`eqk')
          matrix `Hj' = (nullmat(`Hj'), `Hjk')
        }
        matrix `Hs' = (nullmat(`Hs') \ `Hj')
        matrix drop `Hj'
      }
      matrix `negH' = `negH' + `Hs'
      matrix drop `Hs'
    }

    ********** Test Statistic **********
    tempname M scorestat p
    matrix `M' = `g' * invsym(`negH') * `g''
    scalar `scorestat' = `M'[1,1]

    restore

    ********** p-value **********
    if `bootstrap' > 0 {
      scoregof_bootstrap1 `bootstrap' `scorestat'
    }

    di
    di as txt "Score goodness-of-fit test for probit"
    di

    if `bootstrap' > 0 {
      scalar `p' = r(p)
      di as txt "Score test statistic =" as res %8.2f `scorestat'
      di as txt "Bootstrapped p-value =  " as res %8.4f `p'
    }
    else {
      scalar `p' = chi2tail(2, `scorestat')
      di as txt _col(14) "chi2(2) =" as res %8.2f `scorestat'
      di as txt _col(10) "Prob > chi2 =  " as res %8.4f `p'
    }
    return scalar scorestat = `scorestat'
    return scalar p = `p'

  }
  else {
    tempname b numparams
    matrix `b' = e(b)
    scalar `numparams' = colsof(`b') + 9

    // Get the LHS variables
    local depvar = e(depvar)
    gettoken y1 depvar : depvar
    gettoken y2 depvar : depvar

    // Get the RHS variables
    local eqs : coleq e(b)
    local regressors : colnames e(b)
    local noconstant1 = ",noconstant"
    local noconstant2 = ",noconstant"
    while 1 {
      gettoken eqi eqs : eqs
      gettoken regressori regressors : regressors
      if "`eqi'" == "`y1'" {
        if "`regressori'" == "_cons" {
          local noconstant1
        }
        else {
          local x1 `x1' `regressori'
        }
      }
      else if "`eqi'" == "`y2'" {
        if "`regressori'" == "_cons" {
          local noconstant2
        }
        else {
          local x2 `x2' `regressori'
        }
      }
      else continue, break
    }

    preserve
    quietly keep if e(sample)  // use only observations in the estimation sample

    // ml model statement is invoked so that mlvecsum and mlmatsum will work below,
    // but we don't re-run the maximization here; we just initialize with the
    // estimated parameters computed by -biprobit- previously.
    ml model d2 bipr_lf (`y1':`y1'=`x1'`noconstant1') (`y2':`y2'=`x2'`noconstant2') /athrho
    ml init `b'

    tempvar xb1 xb2
    tempname rho sq1mrho
    predict `xb1', xb1
    predict `xb2', xb2
    scalar `rho' = e(rho)
    scalar `sq1mrho' = sqrt(1 - `rho'^2)

    ********** Likelihood **********
    tempvar s1 s2 sxb1 sxb2 srho fi
    tempname lnf
    qui gen byte `s1' = 2 * `y1' - 1
    qui gen byte `s2' = 2 * `y2' - 1
    qui gen double `sxb1' = `s1'*`xb1'
    qui gen double `sxb2' = `s2'*`xb2'
    qui gen double `srho' = `s1'*`s2'*`rho'
    qui gen double `fi' = binormal(`sxb1', `sxb2', `srho')
    scalar `lnf' = 0  // only needed because mlvecsum and mlmatsum require this as an input but don't really use it

    ********** Gradient **********
    tempvar T1i T2i T3i g1i g2i g3i g4i g5i g6i g7i g8i g9i g10i g11i g12i
    qui gen double `T1i'  = normalden(`sxb1') * normal((`sxb2'-`srho'*`sxb1') / `sq1mrho')
    qui gen double `T2i'  = normalden(`sxb2') * normal((`sxb1'-`srho'*`sxb2') / `sq1mrho')
    qui gen double `T3i'  = normalden(`sxb1') * normalden((`sxb2'-`srho'*`sxb1') / `sq1mrho') / `sq1mrho'
    qui gen double `g1i'  = `s1' * `T1i' / `fi'
    qui gen double `g2i'  = `s2' * `T2i' / `fi'
    qui gen double `g3i'  = `s1' * `s2' * `T3i' / `fi'
    qui gen double `g4i'  = `s1' * ((`sxb1'^2 - 1) * `T1i' - `srho' * ((`srho'^2-2)*`sxb1' + `srho'*`sxb2')*`T3i' / `sq1mrho'^2) / 6 / `fi'
    qui gen double `g5i'  = `s2' * (-`sxb1' + `srho'*`sxb2') * `T3i' / `sq1mrho'^2 / 2 / `fi'
    qui gen double `g6i'  = `s1' * (-`sxb2' + `srho'*`sxb1') * `T3i' / `sq1mrho'^2 / 2 / `fi'
    qui gen double `g7i'  = `s2' * ((`sxb2'^2 - 1) * `T2i' - `srho' * ((`srho'^2-2)*`sxb2' + `srho'*`sxb1')*`T3i' / `sq1mrho'^2) / 6 / `fi'
    qui gen double `g8i'  = (-`sxb1'*(`sxb1'^2 - 3)*`T1i' - `srho' * ((`srho'^4 - 3*`srho'^2 + 3)*`sxb1'^2 + (`srho'^3-3*`srho')*`sxb1'*`sxb2' + `srho'^2*`sxb2'^2 - (3-2*`srho'^2)*(1-`srho'^2))*`T3i' / `sq1mrho'^4) / 24 / `fi'
    qui gen double `g9i'  = `s1' * `s2' * (`sxb1'^2 - 2*`srho'*`sxb1'*`sxb2' + `srho'^2*`sxb2'^2 - (1-`srho'^2)) * `T3i' / `sq1mrho'^4 / 6 / `fi'
    qui gen double `g10i' = -(`srho'*`sxb1'^2 - (1+`srho'^2)*`sxb1'*`sxb2' + `srho'*`sxb2'^2 - `srho'*(1-`srho'^2)) * `T3i' / `sq1mrho'^4 / 4 / `fi'
    qui gen double `g11i' = `s1' * `s2' * (`sxb2'^2 - 2*`srho'*`sxb2'*`sxb1' + `srho'^2*`sxb1'^2 - (1-`srho'^2)) * `T3i' / `sq1mrho'^4 / 6 / `fi'
    qui gen double `g12i' = (-`sxb2'*(`sxb2'^2 - 3)*`T2i' - `srho' * ((`srho'^4 - 3*`srho'^2 + 3)*`sxb2'^2 + (`srho'^3-3*`srho')*`sxb2'*`sxb1' + `srho'^2*`sxb1'^2 - (3-2*`srho'^2)*(1-`srho'^2))*`T3i' / `sq1mrho'^4) / 24 / `fi'

    tempname gj g
    forvalues j = 1/12 {
      local eqj = min(`j', 3)
      mlvecsum `lnf' `gj' = `g`j'i', eq(`eqj')
      matrix `g' = (nullmat(`g'), `gj')
    }

    ********** Information Matrix **********
    tempvar Hi
    tempname Hs Hj Hjk negH
    qui gen double `Hi' = .
    matrix `negH' = J(`numparams', `numparams', 0)
    forvalues yy1 = 0/1 {
      forvalues yy2 = 0/1 {
        qui replace `y1' = `yy1'
        qui replace `y2' = `yy2'
        drop `xb1' `xb2'
        predict `xb1', xb1  // recompute xb1, xb2 here in case of endogeneity (one y appears in the other x)
        predict `xb2', xb2
        qui replace `s1' = 2 * `y1' - 1
        qui replace `s2' = 2 * `y2' - 1
        qui replace `sxb1' = `s1'*`xb1'
        qui replace `sxb2' = `s2'*`xb2'
        qui replace `srho' = `s1'*`s2'*`rho'
        qui replace `fi' = binormal(`sxb1', `sxb2', `srho')
        qui replace `T1i'  = normalden(`sxb1') * normal((`sxb2'-`srho'*`sxb1') / `sq1mrho')
        qui replace `T2i'  = normalden(`sxb2') * normal((`sxb1'-`srho'*`sxb2') / `sq1mrho')
        qui replace `T3i'  = normalden(`sxb1') * normalden((`sxb2'-`srho'*`sxb1') / `sq1mrho') / `sq1mrho'
        qui replace `g1i'  = `s1' * `T1i' / `fi'
        qui replace `g2i'  = `s2' * `T2i' / `fi'
        qui replace `g3i'  = `s1' * `s2' * `T3i' / `fi'
        qui replace `g4i'  = `s1' * ((`sxb1'^2 - 1) * `T1i' - `srho' * ((`srho'^2-2)*`sxb1' + `srho'*`sxb2')*`T3i' / `sq1mrho'^2) / 6 / `fi'
        qui replace `g5i'  = `s2' * (-`sxb1' + `srho'*`sxb2') * `T3i' / `sq1mrho'^2 / 2 / `fi'
        qui replace `g6i'  = `s1' * (-`sxb2' + `srho'*`sxb1') * `T3i' / `sq1mrho'^2 / 2 / `fi'
        qui replace `g7i'  = `s2' * ((`sxb2'^2 - 1) * `T2i' - `srho' * ((`srho'^2-2)*`sxb2' + `srho'*`sxb1')*`T3i' / `sq1mrho'^2) / 6 / `fi'
        qui replace `g8i'  = (-`sxb1'*(`sxb1'^2 - 3)*`T1i' - `srho' * ((`srho'^4 - 3*`srho'^2 + 3)*`sxb1'^2 + (`srho'^3-3*`srho')*`sxb1'*`sxb2' + `srho'^2*`sxb2'^2 - (3-2*`srho'^2)*(1-`srho'^2))*`T3i' / `sq1mrho'^4) / 24 / `fi'
        qui replace `g9i'  = `s1' * `s2' * (`sxb1'^2 - 2*`srho'*`sxb1'*`sxb2' + `srho'^2*`sxb2'^2 - (1-`srho'^2)) * `T3i' / `sq1mrho'^4 / 6 / `fi'
        qui replace `g10i' = -(`srho'*`sxb1'^2 - (1+`srho'^2)*`sxb1'*`sxb2' + `srho'*`sxb2'^2 - `srho'*(1-`srho'^2)) * `T3i' / `sq1mrho'^4 / 4 / `fi'
        qui replace `g11i' = `s1' * `s2' * (`sxb2'^2 - 2*`srho'*`sxb2'*`sxb1' + `srho'^2*`sxb1'^2 - (1-`srho'^2)) * `T3i' / `sq1mrho'^4 / 6 / `fi'
        qui replace `g12i' = (-`sxb2'*(`sxb2'^2 - 3)*`T2i' - `srho' * ((`srho'^4 - 3*`srho'^2 + 3)*`sxb2'^2 + (`srho'^3-3*`srho')*`sxb2'*`sxb1' + `srho'^2*`sxb1'^2 - (3-2*`srho'^2)*(1-`srho'^2))*`T3i' / `sq1mrho'^4) / 24 / `fi'
        forvalues j = 1/12 {
          local eqj = min(`j', 3)
          forvalues k = 1/12 {
            local eqk = min(`k', 3)
            qui replace `Hi' = `fi' * `g`j'i' * `g`k'i'
            mlmatsum `lnf' `Hjk' = `Hi', eq(`eqj',`eqk')
            matrix `Hj' = (nullmat(`Hj'), `Hjk')
          }
          matrix `Hs' = (nullmat(`Hs') \ `Hj')
          matrix drop `Hj'
        }
        matrix `negH' = `negH' + `Hs'
        matrix drop `Hs'
      }
    }

    ********** Test Statistic **********
    tempname M scorestat p
    matrix `M' = `g' * invsym(`negH') * `g''
    scalar `scorestat' = `M'[1,1]

    restore

    ********** p-value **********
    if `bootstrap' > 0 {
      scoregof_bootstrap2 `bootstrap' `scorestat'
    }

    di
    di as txt "Murphy's score test for biprobit"
    di

    if `bootstrap' > 0 {
      scalar `p' = r(p)
      di as txt "Score test statistic =" as res %8.2f `scorestat'
      di as txt "Bootstrapped p-value =  " as res %8.4f `p'
    }
    else {
      scalar `p' = chi2tail(9, `scorestat')
      di as txt _col(14) "chi2(9) =" as res %8.2f `scorestat'
      di as txt _col(10) "Prob > chi2 =  " as res %8.4f `p'
    }
    return scalar scorestat = `scorestat'
    return scalar p = `p'
  }
end

program scoregof_bootstrap1, rclass
  args reps scorestat

  quietly {
    local cmdline = e(cmdline)

    // Get the LHS variable
    local y = e(depvar)

    tempname holdname
    _estimates hold `holdname', copy restore 
    preserve

    tempvar p1
    predict `p1', pr

    tempname higher
    scalar `higher' = 0
    forvalues boot = 1/`reps' {
      replace `y' = (uniform() < `p1')
      `cmdline'
      scoregof
      if r(scorestat) > `scorestat' {
        scalar `higher' = `higher' + 1
      }
    }

    return scalar p = (`higher' + 0.5) / (`reps' + 1)
  }   
end

program scoregof_bootstrap2, rclass
  args reps scorestat

  quietly {
    local cmdline = e(cmdline)

    // Get the LHS variables
    local depvar = e(depvar)
    gettoken y1 depvar : depvar
    gettoken y2 depvar : depvar

    tempname holdname
    _estimates hold `holdname', copy restore 
    preserve

    tempvar p00 p01 p10 p11 u
    forvalues yy1 = 0/1 {
      forvalues yy2 = 0/1 {
        replace `y1' = `yy1'
        replace `y2' = `yy2'
        predict `p`yy1'`yy2'', p`yy1'`yy2'
      }
    }
    gen double `u' = .

    tempname higher
    scalar `higher' = 0
    forvalues boot = 1/`reps' {
      replace `u' = uniform()
      replace `y1' = (`u' >= `p00' + `p01')
      replace `y2' = ((`u' >= `p00' & `u' < `p00' + `p01') | `u' >= `p00' + `p01' + `p10')
      `cmdline'
      scoregof
      if r(scorestat) > `scorestat' {
        scalar `higher' = `higher' + 1
      }
    }

    return scalar p = (`higher' + 0.5) / (`reps' + 1)
  }   
end