*! version 1.1.1 04June2017 Mauricio Caceres Bravo, caceres@nber.org
*! Mata files for alogit.ado

version 13.1

///////////////////////////////////////////////////////////////////////
//                       Main alogit function                        //
///////////////////////////////////////////////////////////////////////

// capture mata: mata drop alogit()
// capture mata: mata drop _alogit_exact()
// capture mata: mata drop _alogit_exact_fast()
// capture mata: mata drop _alogit_exact_loop()
//
// capture mata: mata drop alogit_logisticden()
// capture mata: mata drop alogit_combos()
// capture mata: mata drop _alogit_binadd_reverse()

// dlabels = `dlabels'
// aloglik = &`mata_loglik'()
// predict = &`mata_predict'()

mata

// Main alogit function to perform the optimization
transmorphic function alogit(string rowvector dlabels,
                             pointer(function) aloglik,
                             pointer(function) predict)
{
    transmorphic M

    // Stata options
    string scalar model, method, algorithm, defgood, consider
    real scalar asexp, getc, getdef, reps, pll, fast

    // Data from Stata
    real matrix x, xz, info
    real colvector y, defcons
    real rowvector b

    // Auxiliary variables for optimization
    real colvector phi0, Pc0, nj_i, nc_i
    real matrix C, U0, C0, PC0

    // moptimize options
    real scalar difficult,
                technique,
                iterate,
                tolerance,
                ltolerance,
                nrtolerance

    string scalar exp,
                  nolog,
                  gradient,
                  hessian,
                  showstep,
                  showtolerance,
                  coefdiffs,
                  trace,
                  nonrtolerance,
                  norescale

    // Auxiliary variables for this function
    real scalar loglik, i, nj
    string scalar nstr, jstr
    real rowvector panel

    // Parse options from Stata
    // ------------------------

    exp       = st_local("exp")             // Exponential version
    model     = st_local("model")           // Model (dsc or alogit)
    method    = st_local("method")          // Method (exact, importance)
    algorithm = st_local("algorithm")       // Algorithm (matrix, loop, fast, faster)
    reps      = strtoreal(st_local("reps")) // Repetitions for importance
    defgood   = st_local("default")         // Default good
    consider  = st_local("consider")        // Consider goods

    // whether to compute the likelihood for exp(b)
    asexp = ( exp == "exp" )

    // Whether to process a default good or consideration goods
    // getcons = (consider != "") & (defgood == "")
    getdef = (defgood != "") & (consider == "")

    // Print the likelihood; don't optimize
    pll  = (st_local("loglik") == "loglik")

    // Will only generate C for model(alogit) and alg(matrix)
    getc = ((model == "alogit") & (algorithm == "matrix"))

    // If the computations are done in C, skip (*predict) since that
    // will be called directly from alogit.ado
    fast = ((algorithm == "fast") | (algorithm == "faster"))

    // Initialize optimization programs
    // --------------------------------

    M = moptimize_init()

    // Turn off intercepts (will add constant in .ado files as applicable)
    moptimize_init_eq_cons(M, 1, "off")
    moptimize_init_eq_cons(M, 2, "off")

    // Mark sample to use; grab grouping variable
    moptimize_init_touse(M, st_local("touse"))
    moptimize_init_by(M, st_local("group"))

    // Set up choice sets by grouping variable
    info  = panelsetup(*moptimize_util_by(M), 1)
    panel = panelstats(info)
    if (panel[3] == panel[4]) {
        nstr = strtrim(sprintf("%21.0gc", panel[1]))
        jstr = strtrim(sprintf("%21.0gc", panel[3]))
        printf("Balanced panel. n = " + nstr + ", j = " + jstr + "\n")
    }
    else {
        nstr = strtrim(sprintf("%21.0gc", panel[1]))
        jstr = strtrim(sprintf("%21.0gc", panel[3]))
        jstr = jstr + " to " + strtrim(sprintf("%21.0gc", panel[4]))
        printf("Unbalanced panel. n = " + nstr + ", j = " + jstr + "\n")
    }

    // Set up optimization variables
    // -----------------------------

    if (fast) {
        moptimize_init_userinfo(M, 1, algorithm)
        moptimize_init_userinfo(M, 8, asexp)
    }
    else {
        // Grab the data
        x  = st_data(., st_local("xvars"),  st_local("touse"))
        xz = st_data(., st_local("xzvars"), st_local("touse"))
        // xb  = x  * st_matrix("b0")'
        // xzd = xz * st_matrix("d0")'

        // Default good
        defcons = st_data(., defgood + consider,  st_local("touse"))
        moptimize_init_userinfo(M, 9, defcons)

        if (method == "exact") {
            // Aux for computing choice sets
            nj   = panel[4]
            nj_i = info[, 2] :- info[, 1] :+ 1
            nc_i = getc? (2:^nj_i :- 1): .
            C    = getc? alogit_combos(nj): .

            // Pass user variables to compute the exact likelihood
            moptimize_init_userinfo(M, 1, C)
            moptimize_init_userinfo(M, 2, nc_i)
            moptimize_init_userinfo(M, 3, nj_i)
            moptimize_init_userinfo(M, 4, getdef)
            moptimize_init_userinfo(M, 5, x)
            moptimize_init_userinfo(M, 6, xz)
            moptimize_init_userinfo(M, 7, info)
            moptimize_init_userinfo(M, 8, asexp)
        }
        else if (method == "importance") {
            // Get a random sample of choice sets
            phi0 = invlogit(xz * st_matrix("d0")')
            U0   = runiform(rows(xz), reps)
            C0   = phi0 :> U0
            Pc0  = C0 :* log(phi0) :+ (1 :- C0) :* log(1 :- phi0)

            // Get the probabilities of the choice sets sample
            PC0  = J(rows(info), reps, .)
            for (i = 1; i <= rows(info); i++) {
                PC0[i, ] = exp(colsum(panelsubmatrix(Pc0, i, info)))
            }

            // Pass user variables to compute the importance likelihood
            moptimize_init_userinfo(M, 1, C0)
            moptimize_init_userinfo(M, 2, PC0)
            moptimize_init_userinfo(M, 3, reps)
            moptimize_init_userinfo(M, 4, U0)
            moptimize_init_userinfo(M, 5, x)
            moptimize_init_userinfo(M, 6, xz)
            moptimize_init_userinfo(M, 7, info)
            moptimize_init_userinfo(M, 8, 0)
        }
    }

    // Set up optimization
    // -------------------

    // Set the evaluator to compute the importance likelihood
    moptimize_init_evaluator(M, aloglik)
    moptimize_init_evaluatortype(M, st_local("eval"))

    // Set dependent variable, independent vars
    moptimize_init_depvar(M, 1, st_local("depvar"))
    moptimize_init_eq_indepvars(M, 1, st_local("xvars"))
    moptimize_init_eq_indepvars(M, 2, st_local("xzvars"))

    // Label the things
    moptimize_init_eq_name(M, 1, "PY")
    moptimize_init_eq_name(M, 2, "PA")
    moptimize_init_eq_colnames(M, 2, dlabels)

    // Set up the starting parameters
    moptimize_init_eq_coefs(M, 1, asexp? exp(st_matrix("b0")): st_matrix("b0"))
    moptimize_init_eq_coefs(M, 2, asexp? exp(st_matrix("d0")): st_matrix("d0"))

    // Make the display pretty
    moptimize_init_valueid(M, "log likelihood")

    // Pass user options to moptimize()
    // --------------------------------

    difficult   = st_numscalar("difficult")
    technique   = st_local("technique")
    iterate     = st_numscalar("iterate")
    tolerance   = strtoreal(st_local("tolerance"))
    ltolerance  = strtoreal(st_local("ltolerance"))
    nrtolerance = strtoreal(st_local("nrtolerance"))

    if (difficult)        moptimize_init_singularHmethod(M, "hybrid")
    if (technique != "")  moptimize_init_technique(M, technique)
    if (iterate >= 0)     moptimize_init_conv_maxiter(M, iterate)
    if (tolerance   > 0)  moptimize_init_conv_ptol(M , tolerance)
    if (ltolerance  > 0)  moptimize_init_conv_vtol(M , ltolerance)
    if (nrtolerance > 0)  moptimize_init_conv_nrtol(M, nrtolerance)

    nolog         = (st_local("nolog")         == "")? "on":  "off"
    gradient      = (st_local("gradient")      == "")? "off": "on"
    hessian       = (st_local("hessian")       == "")? "off": "on"
    showstep      = (st_local("showstep")      == "")? "off": "on"
    showtolerance = (st_local("showtolerance") == "")? "off": "on"
    coefdiffs     = (st_local("coefdiffs")     == "")? "off": "on"
    trace         = (st_local("trace")         == "")? "off": "on"
    nonrtolerance = (st_local("nonrtolerance") == "")? "off": "on"
    norescale     = (st_local("norescale")     == "")? "on":  "off"

    moptimize_init_trace_value(M,      nolog)
    moptimize_init_trace_gradient(M,   gradient)
    moptimize_init_trace_Hessian(M,    hessian)
    moptimize_init_trace_step(M,       showstep)
    moptimize_init_trace_tol(M,        showtolerance)
    moptimize_init_trace_coefdiffs(M,  coefdiffs)
    moptimize_init_trace_coefs(M,      trace)
    moptimize_init_conv_ignorenrtol(M, nonrtolerance)
    moptimize_init_search_rescale(M ,  norescale)
    moptimize_init_nmsimplexdeltas(M, 1)

    // Run the routine and put the results in Stata
    // --------------------------------------------

    if (pll) {
        // Print likelihood at default/user-provided parameters
        b = asexp? exp((st_matrix("b0"), st_matrix("d0"))): st_matrix("b0"), st_matrix("d0")
        y = moptimize_init_depvar(M, 1)
        loglik = (*predict)(M, y, x, xz, defcons, method, "ll", b)
        printf("log-likelihood = " + strtrim(sprintf("%21.4f\n", loglik)))
        st_numscalar("ll", loglik)
    }
    else {
        // Run the optimization
        printf("\n")
        moptimize(M)
        if (!fast) {

            // Get average P(empty set)
            y = moptimize_init_depvar(M, 1)
            st_numscalar("mp0", (*predict)(M, y, x, xz, defcons, method, "mp0"))

            // Get average P(attention)
            if (model == "dsc") {
                st_numscalar("mp1", (*predict)(M, y, x, xz, defcons, method, "mp1"))
            }
            else {
                st_numscalar("mp1", .)
            }
        }
        // For fast, these are done in alogit.ado
    }
    return(M)
}

///////////////////////////////////////////////////////////////////////
//                         Exact Likelihood                          //
///////////////////////////////////////////////////////////////////////

// Loglik for exact alogit, matrix version
void function _alogit_exact(transmorphic M,
                            real scalar todo,
                            real rowvector b,
                            fv, g, H)
{

    // Function variables
    real matrix x, xz, info, C, xbmat, xzdmat
    real colvector nj_i, nc_i
    real colvector y, defcons, xb, xzd
    real rowvector bb, gd
    real scalar asexp, kx, kxz, ktot, getdef, nc_0

    // Function helpers
    real matrix xdphi_ij, xphi_ij
    real colvector exb, phi_ij, lphi_c, lphi_nc

    // Loop variables to use for the likelihood
    real scalar i
    real matrix Ci, Ciall, PY
    real colvector y_ij, exb_ij, dc_ij
    real colvector Cisel, lPA
    real rowvector PA, PY_ij

    // Loop variables to use for the gradient
    real colvector dLdP
    real rowvector dLdP_aux, dLdb_k, dLdg_k
    real matrix x_ij, xz_ij, xphi_i
    real matrix dPdb_k_aux, dPdg_k_aux

    // Loop variables to use for the Hessian
    real scalar l, k
    real matrix hb, hg, hbg, haux
    real matrix dPdb_l0, dLdb_kl1, dLdb_kl2, dLdb_kl3, dLdb_kl0, dPdb_kl
    real matrix dPdb_kl_aux, dPdg_lk_aux, dPdbg_lk
    real matrix dPdg_kl0, dLdg_kl

    // Variables to use in the computation
    // -----------------------------------

    C       = moptimize_init_userinfo(M, 1) // Choice sets
    nc_i    = moptimize_init_userinfo(M, 2) // Goods per group
    nj_i    = moptimize_init_userinfo(M, 3) // 2^nc_i - 1
    getdef  = moptimize_init_userinfo(M, 4) // Get default or consider
    x       = moptimize_init_userinfo(M, 5) // utility vars
    xz      = moptimize_init_userinfo(M, 6) // attention vars
    info    = moptimize_init_userinfo(M, 7) // pos of each group in data
    asexp   = moptimize_init_userinfo(M, 8) // Estimate exp(b) isntead of b
    defcons = moptimize_init_userinfo(M, 9) // default good/consider goods
    y       = moptimize_util_depvar(M, 1)   // outcome (choice)
    kx      = cols(x)
    kxz     = cols(xz)
    ktot    = kx + kxz

    if ( asexp ) {
        bb      = b[|1 \ kx|]       // beta
        gd      = b[|kx + 1 \ .|]   // gamma, delta
        xbmat   = (bb :^ x)         // exp(b)^x * b
        xzdmat  = (gd :^ xz)        // exp(g, d)^(x, z)
        xb      = xbmat[, 1]        // prod(exp(b)^x over kx) = exp(xb)
        xzd     = xzdmat[, 1]       // prod(exp(g, d)^(x, z) over kx + kz) = exp(xg + zd)
        for (k = 2; k <= kx; k++)  xb  = xb  :* xbmat[, k]
        for (k = 2; k <= kxz; k++) xzd = xzd :* xzdmat[, k]
    }
    else {
        xb  = moptimize_util_xb(M, b, 1)  // x * b
        xzd = moptimize_util_xb(M, b, 2)  // x * g + z * d
    }

    phi_ij = asexp? (xzd :/ (1 :+ xzd)): invlogit(xzd) // Get phi for P(attention)
    if (!getdef) phi_ij[selectindex(defcons), ] = J(sum(defcons), 1, 1)

    _editvalue(phi_ij, 1, 1 - epsilon(1)) // Numerical precision foo
    _editvalue(phi_ij, 0, epsilon(1))     // Numerical precision foo

    xphi_ij  = phi_ij :* xz       // Helper for the gradient
    exb      = asexp? xb: exp(xb) // Helper for P(Y)
    lphi_c   = log(phi_ij)        // Helpers for P(A) since Mata
    lphi_nc  = log(1 :- phi_ij)   // does not have a prod function.

    // Helper for the Hessian
    xdphi_ij = ( asexp? (xzd :/ (1 :+ xzd):^2): alogit_logisticden(xzd) ) :* xz
    if (!getdef) xdphi_ij[selectindex(defcons), ] = J(sum(defcons), kxz, 0)

    H  = J(ktot, ktot,  0) // Hessian
    g  = J(1, ktot,  0)    // Gradient
    fv = 0                 // Log likelihood

    // Loop over individuals
    // ---------------------

    for (i = 1; i <= rows(info); i++) {
        // Drop individuals with > 1 or 0 goods chosen
        y_ij = panelsubmatrix(y, i, info)
        if(sum(y_ij) != 1) continue
        dc_ij = panelsubmatrix(defcons, i, info)

        // Get choice sets by individual
        nc_0  = getdef? 1: nc_i[i] - 2^(nj_i[i] - sum(dc_ij)) + 2
        Ciall = C[|nc_0, 1 \ nc_i[i] + 1, nj_i[i]|]
        Cisel = selectindex(Ciall[, selectindex(y_ij)])
        Ci    = J(1, nj_i[i], 0) \ Ciall[Cisel, ]

        // Compute the likelihood
        // ----------------------

        // Probability of each choice set. We want to compute P(Y |
        // c, theta) P(c | theta); we compute P(c | theta) / (total
        // for P(Y)) and then sum over all the choice sets
        lPA    = Ci * panelsubmatrix(lphi_c, i, info) +
                 (1 :- Ci) * panelsubmatrix(lphi_nc, i, info)
        PA     = rowshape(exp(lPA), 1)
        exb_ij = panelsubmatrix(exb, i, info)
        PY     = rowshape(exb_ij, 1) :* (Ci :/ (Ci * exb_ij))

        _editmissing(PY, 0)
        if (getdef) PY[1, selectindex(dc_ij)] = 1
        PY_ij = PA * PY

        // Compute the likelihood
        fv = fv + log(PY_ij) * y_ij

        // Compute the gradient and the Hessian
        // ------------------------------------

        if (todo >= 1) {
            dLdP     = y_ij :/ colshape(PY_ij, 1)
            dLdP_aux = PY * dLdP
            x_ij     = panelsubmatrix(x,  i, info)
            xz_ij    = panelsubmatrix(xz, i, info)
            xphi_i   = panelsubmatrix(xphi_ij, i, info)'

            // Compute the gradient
            dPdb_k_aux = (PY * x_ij)'
            dPdg_k_aux = ((Ci * xz_ij) :- rowshape(rowsum(xphi_i), 1))'

            dLdb_k = PY_ij * (x_ij  :* dLdP) - ((dPdb_k_aux :* PA) * dLdP_aux)'
            dLdg_k = ((PA :* dPdg_k_aux) * dLdP_aux)'
            g = g + (dLdb_k, dLdg_k)

            // Compute the Hessian
            if (todo == 2) {
                hb  = dLdb_k' * dLdb_k
                hg  = dLdg_k' * dLdg_k
                hbg = dLdb_k' * dLdg_k

                // For betas
                for (k = 1; k <= kx; k++) {
                    for (l = k; l <= kx; l++) {
                        dPdb_l0  = PY :* x_ij[, l]' - PY :* (PY * x_ij[, l])
                        dLdb_kl1 = dPdb_l0 :* x_ij[, k]'
                        dLdb_kl2 = dPdb_l0 :* colshape(dPdb_k_aux[k, ], 1)
                        dLdb_kl3 = PY :* (dPdb_l0 * x_ij[, k])
                        dLdb_kl0 = dLdb_kl1 -  dLdb_kl2 - dLdb_kl3
                        dPdb_kl  = (PA * dLdb_kl0) * dLdP
                        H[k, l]  = H[k, l] + dPdb_kl - hb[k, l]
                    }

                    // Cross partials
                    dPdb_kl_aux = PY :* x_ij[, k]' - (PY * x_ij[, k]) :* PY
                    for (l = 1; l <= kxz; l++) {
                        dPdg_lk_aux = rowshape((Ci * xz_ij[, l]) :- sum(xphi_i[l, ]), 1) :* PA
                        dPdbg_lk = (dPdg_lk_aux * dPdb_kl_aux) * dLdP
                        H[k, kx + l] = H[k, kx + l] + dPdbg_lk - hbg[k, l]
                    }
                }

                // For gamma, delta
                haux = uppertriangle(panelsubmatrix(xdphi_ij, i, info)' * xz_ij)
                for (k = 1; k <= kxz; k++) {
                    for (l = k; l <= kxz; l++) {
                        dPdg_kl0   = (dPdg_k_aux[l, ] :* dPdg_k_aux[k, ] :- haux[k, l]) :* PA
                        dLdg_kl    = dPdg_kl0 * dLdP_aux - hg[k, l]
                        H[kx + k, kx + l] = H[kx + k, kx + l] + dLdg_kl
                    }
                }
            }
        }
    }
    if ( todo == 2 ) H = makesymmetric(uppertriangle(H)')
    if ( asexp ) {
        if (todo >= 1) g = g :/ b
        if (todo == 2) H = H :/ (b' * b) :- diag(g :/ b)
    }
}

// Compute the likelihood, gradient, and Hessian in C
void function _alogit_exact_fast(transmorphic M,
                                 real scalar todo,
                                 real rowvector b,
                                 fv, g, H)
{
    real scalar ktot
    ktot = st_numscalar("__alogit_ktot")
    H    = J(ktot, ktot,  0);
    g    = J(1, ktot,  0);
    st_numscalar("__alogit_ll", .)
    st_matrix("__alogit_b", b)
    st_numscalar("__alogit_todo", todo)
    if (moptimize_init_userinfo(M, 1) == "fast") {
        stata("plugin call aloglik_fast " + st_local("fastcall") + " alogit")
    }
    else {
        stata("plugin call aloglik_faster " + st_local("fastcall") + " alogit")
    }
    if (todo >= 1) g = st_matrix("__alogit_g")
    if (todo == 2) H = makesymmetric(uppertriangle(st_matrix("__alogit_H"))')
    fv = st_numscalar("__alogit_ll")
    if ( moptimize_init_userinfo(M, 8) ) {
        if (todo >= 1) g = g :/ b
        if (todo == 2) H = H :/ (b' * b) :- diag(g :/ b)
    }
}

// Loglik for exact alogit, loop version
void function _alogit_exact_loop(transmorphic M,
                                 real scalar todo,
                                 real rowvector b,
                                 fv, g, H)
{

    // Function variables
    real matrix x, xz, info, xbmat, xzdmat
    real colvector y, defcons, xb, xzd, nj_i
    real rowvector bb, gd
    real scalar asexp, kx, kxz, ktot, getdef

    // Function helpers
    real matrix xdphi_ij, xphi_ij
    real colvector exb, phi_ij, lphi_c, lphi_nc

    // Loop variables to use for the likelihood
    real scalar i, k, sel
    real scalar PY_ijc, PY_ij, PA
    real colvector y_ij, exb_ij
    real colvector lphi_c_ij, lphi_nc_ij, PY
    real rowvector c

    // Loop variables to use for the gradient
    real matrix x_ij, xz_ij, xphi_i, PYx_ij_mat
    real rowvector x_ij_sel, PYx_ij, cxz_ij, xphi_isum
    real rowvector dPdb_aux, dPdg_aux
    real rowvector dLdb_k, dLdg_k

    // Loop variables to use for the Hessian
    real matrix hb, hg, hbg, haux, xdphi_i
    real rowvector x_ijPY
    real matrix dPdb_kl_1, dPdb_kl_2, dLdb_aux
    real matrix dPdg_l_aux, dLdbg_aux
    real matrix dLdg_aux

    // Variables to use in the computation
    // -----------------------------------

    // Get choice sets, xb, and xg + zd
    nj_i    = moptimize_init_userinfo(M, 3) // Goods per group
    getdef  = moptimize_init_userinfo(M, 4) // Get default or consider
    x       = moptimize_init_userinfo(M, 5) // utility vars
    xz      = moptimize_init_userinfo(M, 6) // attention vars
    info    = moptimize_init_userinfo(M, 7) // pos of each group in data
    asexp   = moptimize_init_userinfo(M, 8) // Estimate exp(b) isntead of b
    defcons = moptimize_init_userinfo(M, 9) // default good/consider goods
    y       = moptimize_util_depvar(M, 1)   // outcome (choice)
    kx      = cols(x)
    kxz     = cols(xz)
    ktot    = kx + kxz

    if ( asexp ) {
        bb      = b[|1 \ kx|]       // beta
        gd      = b[|kx + 1 \ .|]   // gamma, delta
        xbmat   = (bb :^ x)         // exp(b)^x * b
        xzdmat  = (gd :^ xz)        // exp(g, d)^(x, z)
        xb      = xbmat[, 1]        // prod(exp(b)^x over kx) = exp(xb)
        xzd     = xzdmat[, 1]       // prod(exp(g, d)^(x, z) over kx + kz) = exp(xg + zd)
        for (k = 2; k <= kx; k++)  xb  = xb  :* xbmat[, k]
        for (k = 2; k <= kxz; k++) xzd = xzd :* xzdmat[, k]
    }
    else {
        xb  = moptimize_util_xb(M, b, 1)  // x * b
        xzd = moptimize_util_xb(M, b, 2)  // x * g + z * d
    }

    phi_ij = asexp? (xzd :/ (1 :+ xzd)): invlogit(xzd) // Get phi for P(attention)
    if (!getdef) phi_ij[selectindex(defcons), ] = J(sum(defcons), 1, 1)

    _editvalue(phi_ij, 1, 1 - epsilon(1))  // Numerical precision foo
    _editvalue(phi_ij, 0, epsilon(1))      // Numerical precision foo

    xphi_ij  = phi_ij :* xz        // Helper for the gradient
    exb      = asexp? xb: exp(xb)  // Helper for P(Y)
    lphi_c   = log(phi_ij)         // Helper for P(A)
    lphi_nc  = log(1 :- phi_ij)    // Helper for P(A)

    // Helper for the Hessian
    xdphi_ij = ( asexp? (xzd :/ (1 :+ xzd):^2): alogit_logisticden(xzd) ) :* xz
    if (!getdef) xdphi_ij[selectindex(defcons), ] = J(sum(defcons), kxz, 0)

    // Initialize values
    H  = J(ktot, ktot,  0) // Hessian
    g  = J(1, ktot,  0)    // Gradient
    fv = 0                 // Log likelihood

    // Loop over individuals
    // ---------------------

    for (i = 1; i <= rows(info); i++) {

        // Drop individuals with > 1 or 0 goods chosen
        // -------------------------------------------

        y_ij = panelsubmatrix(y, i, info)
        if(sum(y_ij) != 1) continue

        lphi_c_ij  = panelsubmatrix(lphi_c, i, info)
        lphi_nc_ij = panelsubmatrix(lphi_nc, i, info)
        exb_ij     = panelsubmatrix(exb, i, info)
        x_ij       = (todo >= 1)? panelsubmatrix(x,  i, info): .
        xz_ij      = (todo >= 1)? panelsubmatrix(xz, i, info): .
        xphi_i     = (todo >= 1)? panelsubmatrix(xphi_ij, i, info)': .
        xdphi_i    = (todo >= 1)? panelsubmatrix(xdphi_ij, i, info): .

        // Initialize P(Y)
        // ---------------

        sel    = selectindex(y_ij)
        if (getdef) {
            // If you specify a default good, start with the empty set
            c  = J(1, nj_i[i], 0)
            PY = panelsubmatrix(defcons, i, info)
        }
        else {
            // Otherwise, start with the first set that includes the
            // choice and the goods that must be considered.
            c  = panelsubmatrix(defcons, i, info)'
            while (c[sel] == 0) _alogit_binadd_reverse(c)
            PY = c' :* exb_ij :/ (c * exb_ij)
        }
        PA     = exp(c * lphi_c_ij + (1 :- c) * lphi_nc_ij)
        PY_ijc = PA * PY[sel]
        PY_ij  = PY_ijc

        //  Gradient and Hessian
        // ---------------------

        if (todo >= 1) {
            x_ij_sel = x_ij[sel, ]

            // These are defined this way to use in the Hessian
            PYx_ij_mat = PY :* x_ij
            PYx_ij     = colsum(PYx_ij_mat)
            cxz_ij     = c * xz_ij

            // Gradient helpers
            dPdb_aux = PYx_ij * PY_ijc
            dPdg_aux = cxz_ij * PY_ijc

            // Hessian helpers
            xphi_isum = rowshape(rowsum(xphi_i), 1)
            if (todo >= 2) {
                // For beta
                x_ijPY     = x_ij_sel :- PYx_ij
                dPdb_kl_1  = x_ijPY' * x_ijPY
                dPdb_kl_2  = x_ij' * PYx_ij_mat - PYx_ij' * PYx_ij
                dLdb_aux   = (dPdb_kl_1 - dPdb_kl_2) :* PY_ijc

                // Cross partials
                dPdg_l_aux = cxz_ij :- xphi_isum
                dLdbg_aux  = (x_ijPY' * dPdg_l_aux) :* PY_ijc

                // For gamma, delta
                haux       = uppertriangle(xdphi_i' * xz_ij)
                dLdg_aux   = (dPdg_l_aux' * dPdg_l_aux) :* PY_ijc
            }
        }

        // Loop through all consideration sets
        // -----------------------------------

        while (!min(c)) {
             _alogit_binadd_reverse(c)
            if (c[sel] == 0) continue

            // Contribute to P(Y)
            // ------------------

            PY     = c' :* exb_ij :/ (c * exb_ij)
            PA     = exp(c * lphi_c_ij + (1 :- c) * lphi_nc_ij)
            PY_ijc = PA * PY[sel]
            PY_ij  = PY_ij + PY_ijc

            // Gradient and Hessian
            // --------------------

            if (todo >= 1) {

                // Defined this way to use in the Hessian
                PYx_ij_mat = PY :* x_ij
                PYx_ij     = colsum(PYx_ij_mat)
                cxz_ij     = c * xz_ij

                // Gradient helpers
                dPdb_aux = dPdb_aux + PYx_ij * PY_ijc
                dPdg_aux = dPdg_aux + cxz_ij * PY_ijc

                // Hessian
                if (todo == 2) {
                    // For beta
                    x_ijPY     = x_ij_sel :- PYx_ij
                    dPdb_kl_1  = x_ijPY' * x_ijPY
                    dPdb_kl_2  = x_ij' * PYx_ij_mat - PYx_ij' * PYx_ij
                    dLdb_aux   = dLdb_aux + (dPdb_kl_1 - dPdb_kl_2) :* PY_ijc

                    // Cross partials
                    dPdg_l_aux = cxz_ij :- xphi_isum
                    dLdbg_aux  = dLdbg_aux + (x_ijPY' * dPdg_l_aux) :* PY_ijc

                    // For gamma
                    dLdg_aux   = dLdg_aux + (dPdg_l_aux' * dPdg_l_aux) :* PY_ijc
                }
            }
        }

        // Compute the likelihood
        // ----------------------

        fv = fv + log(PY_ij)

        //  Gradient and Hessian
        // ---------------------

        if (todo >= 1) {
            dLdb_k = x_ij_sel :- dPdb_aux :/ PY_ij
            dLdg_k = dPdg_aux :/ PY_ij :- xphi_isum

            g = g + (dLdb_k, dLdg_k)

            // Hessian
            if (todo == 2) {
                hb  = (dLdb_aux  / PY_ij) :- dLdb_k' * dLdb_k
                hbg = (dLdbg_aux / PY_ij) :- dLdb_k' * dLdg_k
                hg  = (dLdg_aux  / PY_ij) :- dLdg_k' * dLdg_k :- haux
                H   = H :+ ((hb, hbg) \ (hbg', hg))
            }
        }

    }
    if (todo == 2) H = makesymmetric(uppertriangle(H)')
    if ( asexp ) {
        if (todo >= 1) g = g :/ b
        if (todo == 2) H = H :/ (b' * b) :- diag(g :/ b)
    }
}

///////////////////////////////////////////////////////////////////////
//                         Helper Functions                          //
///////////////////////////////////////////////////////////////////////

// Logistic density (prior to Stata 14, this was not provided by Mata)
real function alogit_logisticden(real x)
{
    return(invlogit(x) :/ (1 :+ exp(x)))
}

// All the choice sets of nj products in one matrix
real matrix function alogit_combos(real scalar nj)
{
    real scalar nc, i, j
    real matrix C
    string scalar z
    nc = 2^nj
    C  = J(nc, nj, 0)
    for (j = 1; j < nc; j++) {
        z = strreverse(inbase(2, j))
        for (i = 1; i <= strlen(z); i++) {
            C[j + 1, i] = strtoreal(substr(z, i, 1))
        }
    }
    return(C)
}

// Add one to a vector representing a binary number in reverse
// (i.e. first entry is 0s, last is 2^J-1). In testing, using this
// function was faster than using a function that adds one to a vector
// representing a binary number in order.
void function _alogit_binadd_reverse(real rowvector c)
{
    real scalar i
    i = 1
    while (c[i]) {
        c[i] = 0
        i++
    }
    c[i] = 1
}
end
///////////////////////////////////////////////////////////////////////
//                        Alternative Models                         //
///////////////////////////////////////////////////////////////////////

// capture mata: mata drop _alogit_importance()

mata

// Loglik for importance sampler, only one loop
void function _alogit_importance(transmorphic M,
                                 real scalar todo,
                                 real rowvector b,
                                 fv, g, H)
{
    // Function variables
    real matrix x, xz, info, C, PC0
    real colvector y, def, xb, xzd
    real scalar R, kx, kxz, ktot

    // Function helpers
    real matrix xdphi_ij, xphi_ij
    real colvector exb, phi_ij, lphi_c, lphi_nc

    // Loop variables to use for the likelihood
    real scalar i
    string scalar err_noprob
    real matrix Ci, PY
    real colvector y_ij, exb_ij, d_ij
    real colvector lPA
    real rowvector PAPC0, PY_ij, lPY_ij, ll

    // Loop variables to use for the gradient
    real colvector dLdP
    real rowvector dPdb_k, dLdb_k, dPdg_k
    real matrix x_ij, xz_ij, xphi_i
    real matrix dPdb_k_aux, dPdg_k_aux

    // Loop variables to use for the Hessian
    real scalar l, k
    real matrix haux
    real matrix dPdb_l0, dLdb_kl1, dLdb_kl2, dLdb_kl3, dLdb_kl0, dPdb_kl, dLdbg_kl
    real matrix dPdb_kl_aux, dPdg_lk_aux, dPdbg_lk, dPdg_l
    real matrix dPdg_l_aux, dPdg_kl0, dLdg_kl, dPdg_l0

    // Variables to use in the computation
    // -----------------------------------

    // Get choice sets, xb, and xg + zd
    C      = moptimize_init_userinfo(M, 1) // Choice sets
    PC0    = moptimize_init_userinfo(M, 2) // Importance weights
    R      = moptimize_init_userinfo(M, 3) // Repetitions
    x      = moptimize_init_userinfo(M, 5) // utility vars
    xz     = moptimize_init_userinfo(M, 6) // attention vars
    info   = moptimize_init_userinfo(M, 7) // pos of each group in data
    def    = moptimize_init_userinfo(M, 9) // default good
    xb     = moptimize_util_xb(M, b, 1)    // x * b
    xzd    = moptimize_util_xb(M, b, 2)    // x *g + z * d
    y      = moptimize_util_depvar(M, 1)   // outcome (choice)
    kx     = cols(x)
    kxz    = cols(xz)
    ktot   = kx + kxz

    phi_ij   = invlogit(xzd)              // Get phi for P(attention)
    _editvalue(phi_ij, 1, 1 - epsilon(1)) // Numerical precision foo
    _editvalue(phi_ij, 0, epsilon(1))     // Numerical precision foo

    xdphi_ij = alogit_logisticden(xzd) :* xz // Helper for the Hessian
    xphi_ij  = phi_ij :* xz                  // Helper for the gradient
    exb      = exp(xb)                       // Helper for P(Y)
    lphi_c   = log(phi_ij)                   // Helpers for P(A) since Mata
    lphi_nc  = log(1 :- phi_ij)              // does not have a prod function.

    H  = J(ktot, ktot,  0) // Hessian
    g  = J(1, ktot,  0)    // Gradient
    fv = 0                 // Log likelihood

    // Loop over individuals
    // ---------------------

    for (i = 1; i <= rows(info); i++) {
        y_ij = panelsubmatrix(y, i, info)
        if(sum(y_ij) != 1) continue
        d_ij = selectindex(panelsubmatrix(def, i, info))

        // Grab the sampled choice sets
        Ci = panelsubmatrix(C, i, info)'

        // Compute the likelihood
        // ----------------------

        // Probability of each choice set. We want to compute P(Y |
        // c, theta) P(c | theta); we compute P(c | theta) / (total
        // for P(Y)) and then sum over all the choice sets
        lPA    = Ci * panelsubmatrix(lphi_c, i, info) +
                 (1 :- Ci) * panelsubmatrix(lphi_nc, i, info)
        PAPC0  = rowshape(exp(lPA), 1) :/ PC0[i, ]
        exb_ij = panelsubmatrix(exb, i, info)
        PY     = rowshape(exb_ij, 1) :* (Ci :/ (Ci * exb_ij))
        _editmissing(PY, 0)
        _editmissing(PAPC0, 0)
        PY_ij  = PAPC0 * PY

        if (st_local("noprob") == "error") {
            if (sum(!(PY_ij :> 0)) > 0) {
                err_noprob = "The sampler returned probabilities = 0\n" +
                "To fix this, try\n" +
                "- Increasing 'reps'\n" +
                "- Run with -noprob(drop)- to force 0s in the likelihood.\n"
                errprintf(err_noprob)
                exit()
            }
        }

        // Compute the likelihood
        lPY_ij       = PY_ij / R
        lPY_ij[d_ij] = lPY_ij[d_ij]
        ll = log(lPY_ij)
        _editmissing(ll, 0)
        fv = fv + ll * y_ij

        // Figure out if none of the goods were chosen, and the prob of that
        if(sum(y_ij) == 0) {
            fv = fv + sum(panelsubmatrix(lphi_nc, i, info))
        }

        // Compute the gradient and the Hessian
        // ------------------------------------

        if (todo >= 1) {
            dLdP   = y_ij :/ colshape(PY_ij, 1)
            x_ij   = panelsubmatrix(x,  i, info)
            xz_ij  = panelsubmatrix(xz, i, info)
            xphi_i = panelsubmatrix(xphi_ij, i, info)'
            _editmissing(dLdP, 0)

            // For betas
            for (k = 1; k <= kx; k++) {
                dPdb_k_aux = PY * x_ij[, k]
                dPdb_k = PY_ij :* x_ij[, k]' - ((dPdb_k_aux' :* PAPC0) * PY)
                dLdb_k = dPdb_k * dLdP
                g[, k] = g[, k] + dLdb_k

                if (todo == 2) {
                    for (l = k; l <= kx; l++) {
                        dPdb_l0  = PY :* x_ij[, l]' - PY :* (PY * x_ij[, l])
                        dLdb_kl1 = dPdb_l0 :* x_ij[, k]'
                        dLdb_kl2 = dPdb_l0 :* dPdb_k_aux
                        dLdb_kl3 = PY :* (dPdb_l0 * x_ij[, k])
                        dLdb_kl0 = dLdb_kl1 -  dLdb_kl2 - dLdb_kl3
                        dPdb_kl  = ((PAPC0 * dLdb_kl0) * dLdP)
                        dLdbg_kl = (dPdb_k :* (PAPC0 * dPdb_l0)) * (dLdP:^2)
                        H[k, l]  = H[k, l] + dPdb_kl - dLdbg_kl
                    }

                    // Cross partials
                    dPdb_kl_aux = PY :* x_ij[, k]' - (PY * x_ij[, k]) :* PY
                    for (l = 1; l <= kxz; l++) {
                        dPdg_lk_aux = rowshape((Ci * xz_ij[, l]) :- sum(xphi_i[l, ]), 1) :* PAPC0
                        dPdbg_lk = (dPdg_lk_aux * dPdb_kl_aux) * dLdP
                        dPdg_l   =  dPdg_lk_aux * PY
                        H[k, kx + l] = H[k, kx + l] + dPdbg_lk - (dPdb_k :* dPdg_l) * (dLdP:^2)
                    }
                }
            }

            // For gamma, delta
            haux = uppertriangle(panelsubmatrix(xdphi_ij, i, info)' * xz_ij)
            for (k = 1; k <= kxz; k++) {
                dPdg_k_aux = rowshape((Ci * xz_ij[, k]) :- sum(xphi_i[k, ]), 1)
                dPdg_k = (dPdg_k_aux :* PAPC0) * PY
                g[, kx + k] = g[, kx + k] + dPdg_k * dLdP

                if (todo == 2) {
                    for (l = k; l <= kxz; l++) {
                        dPdg_l_aux = rowshape((Ci * xz_ij[, l]) :- sum(xphi_i[l, ]), 1)
                        dPdg_kl0 = (dPdg_l_aux :* dPdg_k_aux :- haux[k, l]) :* PAPC0
                        dPdg_l0  = (dPdg_l_aux :* PAPC0) * PY
                        dLdg_kl  = (dPdg_kl0 * PY) * dLdP - (dPdg_k :* dPdg_l0) * (dLdP:^2)
                        H[kx + k, kx + l] = H[kx + k, kx + l] + dLdg_kl
                    }
                }
            }
            if(sum(y_ij) == 0) {
                g = g - (J(1, kx, 0), rowsum(xphi_i)')
                if (todo == 2) {
                    H[|kx + 1, kx + 1 \ ., .|] = H[|kx + 1, kx + 1 \ ., .|] - haux
                }
            }
        }
    }
    if (todo == 2) H = makesymmetric(uppertriangle(H)')
}
end
///////////////////////////////////////////////////////////////////////
//                     alogit prediction helpers                     //
///////////////////////////////////////////////////////////////////////

// capture mata: mata drop alogit_predict()
// capture mata: mata drop alogit_counterfactual()
// capture mata: mata drop alogit_predict_loop()
// capture mata: mata drop alogit_counterfactual_loop()

mata

// Prediction (various model fits, elasticity, etc.)
real function alogit_predict(transmorphic M,
                             real matrix y,
                             real matrix x,
                             real matrix xz,
                             real matrix defcons,
                             string scalar method,
                             string scalar predict,
                             | real rowvector b)
{

    // Function variables
    real matrix info, C
    real colvector select, nj_i, nc_i
    real colvector xb, xzd
    real scalar kx, getdef, nc_0, ipos, apos, db, dg, R, dydx, nj
    string scalar afit
    real rowvector b0

    // Function helpers
    real colvector exb, phi_ij, lphi_c, lphi_nc, phi0, Pc0
    real matrix PC0

    // Loop variables to use for the likelihood
    real scalar i, p0
    real matrix Ci, PY
    real colvector y_ij, exb_ij, dc_ij, phi_i
    real colvector lPA, dPYPC
    real rowvector PA, PY_ij, PAPC0, ll, dPY_ij, lPY_ij

    // Variables to return
    real scalar mp0, loglik
    real colvector P_ij, PYC_ij, P0_i, dP_ij

    // Variables to use in the computation
    // -----------------------------------

    moptimize_init_touse(M, st_local("touse"))
    moptimize_init_by(M, st_local("group"))

    info   = panelsetup(*moptimize_util_by(M), 1)
    select = selectindex(st_data(., st_local("touse"), st_local("e(sample)")))

    // Parse vector to use
    if (args() == 7) {
        afit = st_local("afit")
        b0   = moptimize_init_eq_coefs(M, 1), moptimize_init_eq_coefs(M, 2)
        b    = (afit == "loglik")? b0: moptimize_result_coefs(M)
    }
    kx = cols(x)
    if ( moptimize_init_userinfo(M, 8) ) b = log(b)

    // For derivatives, elasticities
    dydx = (predict == "dpycdpc") | (predict == "dpyc") | (predict == "dpc")
    if (dydx) {
        ipos = strtoreal(st_local("ipos"))
        apos = strtoreal(st_local("apos"))
        db   = (ipos == 0)? 1 : b[ipos]
        dg   = (apos == 0)? 1 : b[kx + apos]
    }

    // Linear predictions
    xb  = x  * b[|1 \ kx|]'
    if (predict == "u") return(xb)
    xzd = xz * b[|kx + 1 \ .|]'
    if (predict == "a") return(xzd)

    // P(Attention)
    getdef = (st_local("e(default)") != "") & (st_local("e(consider)") == "")
    getdef = getdef | ((st_local("default") != "") & (st_local("consider") == ""))
    phi_ij = invlogit(xzd)
    if (!getdef) phi_ij[selectindex(defcons), ] = J(sum(defcons), 1, 1)
    if (predict == "pa") return(phi_ij)

    // Helpers for loop
    _editvalue(phi_ij, 1, 1 - epsilon(1))
    _editvalue(phi_ij, 0, epsilon(1))
    exb      = exp(xb)
    lphi_c   = log(phi_ij)
    lphi_nc  = log(1 :- phi_ij)

    // Various parsing for different methods
    if (method == "importance") {
        R    = moptimize_init_userinfo(M, 3)
        phi0 = invlogit(xz * moptimize_init_eq_coefs(M, 2)')
        C    = phi0 :> moptimize_init_userinfo(M, 4)[select, ]
        Pc0  = C :* log(phi0) :+ (1 :- C) :* log(1 :- phi0)
        PC0  = J(rows(info), R, .)
        for (i = 1; i <= rows(info); i++) {
            PC0[i, ] = exp(colsum(panelsubmatrix(Pc0, i, info)))
        }
    }
    else {
        nj   = panelstats(info)[4]
        nj_i = info[, 2] :- info[, 1] :+ 1
        nc_i = 2:^nj_i :- 1
        C    = alogit_combos(nj)
    }

    loglik = mp0 = 0
    P_ij   = PYC_ij = P0_i = dP_ij = J(rows(y), 1, .)
    for (i = 1; i <= rows(info); i++) {
        y_ij = panelsubmatrix(y, i, info)
        if(sum(y_ij) != 1) {
            // if (sum(y_ij) > 1) {
            //     errprintf("dropped group with multiple positive outcomes\n")
            // }
            // if (sum(y_ij) == 0) {
            //     errprintf("dropped group with no positive outcomes\n")
            // }
            continue
        }

        exb_ij = panelsubmatrix(exb, i, info)
        PYC_ij[|info[i, ]'|] = exb_ij / sum(exb_ij)
        if (predict == "pyc") continue

        p0  = exp(sum(panelsubmatrix(lphi_nc, i, info)))
        mp0 = mp0 + p0
        if (predict == "mp0") continue
        P0_i[|info[i, ]'|] = J(length(y_ij), 1, p0)
        if (predict == "p0") continue

        dc_ij  = panelsubmatrix(defcons, i, info)
        if (method == "exact") {
            if (getdef) {
                Ci = C[|1, 1 \ nc_i[i] + 1, nj_i[i]|]
            }
            else {
                nc_0 = nc_i[i] - 2^(nj_i[i] - sum(dc_ij)) + 2
                Ci = J(1, nj_i[i], 0) \ C[|nc_0, 1 \ nc_i[i] + 1, nj_i[i]|]
            }
        }
        else if (method == "importance") {
            Ci = panelsubmatrix(C, i, info)'
        }
        lPA    = Ci * panelsubmatrix(lphi_c, i, info) +
                 (1 :- Ci) * panelsubmatrix(lphi_nc, i, info)
        PA     = rowshape(exp(lPA), 1)
        PY     = rowshape(exb_ij, 1) :* (Ci :/ (Ci * exb_ij))
        _editmissing(PY, 0)
        if (getdef & (method == "exact")) PY[1, selectindex(dc_ij)] = 1

        ///////////////
        //  Margins  //
        ///////////////
        if (predict == "dpycdpc") {
            phi_i = rowshape(panelsubmatrix(phi_ij, i, info), 1)
            dPYPC = (1 :- PY) :* db :+ (Ci :- phi_i) :* dg
        }
        else if (predict == "dpyc") {
            dPYPC = (1 :- PY) :* db
        }
        else if (predict == "dpc") {
            phi_i = rowshape(panelsubmatrix(phi_ij, i, info), 1)
            dPYPC = (Ci :- phi_i) :* dg
        }
        else {
            dPYPC = 1
        }
        ///////////////
        //  Margins  //
        ///////////////

        if (method == "exact") {
            PY_ij  = PA * PY
            dPY_ij = PA * (PY :* dPYPC)
            ll     = log(PY_ij)
        }
        else if (method == "importance")  {
            PAPC0  = PA :/ PC0[i, ]
            _editmissing(PAPC0, 0)
            PY_ij  = (PAPC0 * PY) / R
            dPY_ij = (PAPC0 * (PY :* dPYPC)) / R
            ll     = log(PY_ij)
            _editmissing(ll, 0)
        }

        loglik = loglik + ll * y_ij
        P_ij[|info[i, ]'|]  = colshape(PY_ij, 1)
        dP_ij[|info[i, ]'|] = colshape(dPY_ij, 1)
    }
    mp0 = mp0 / (i - 1)

         if (predict == "py")  return(P_ij)
    else if (predict == "pyc") return(PYC_ij)
    else if (predict == "ll")  return(loglik)
    else if (predict == "p0")  return(P0_i)
    else if (predict == "mp0") return(mp0)
    else if (dydx)             return(dP_ij)
}

// Counterfactual (user-provider likelihood and/or P(A))
real function alogit_counterfactual(real colvector u,
                                    real colvector phi,
                                    real colvector defcons,
                                    real matrix info)
{
    real scalar i, nj, getdef, nc_0 
    real matrix C, Ci, PY
    real colvector P_ij, select, nj_i, nc_i
    real colvector exb, lphi_c, lphi_nc
    real colvector u_ij, y_ij, exb_ij, dc_ij, lPA
    real rowvector PA, PY_ij

    getdef  = (st_local("e(default)") != "") & (st_local("e(consider)") == "")
    getdef  = getdef | ((st_local("default") != "") & (st_local("consider") == ""))
    if (!getdef) phi[selectindex(defcons), ] = J(sum(defcons), 1, 1)
    _editvalue(phi, 1, 1 - epsilon(1))
    _editvalue(phi, 0, epsilon(1))

    select  = selectindex(st_data(., st_local("touse")))
    nj      = panelstats(info)[4]
    nj_i    = info[, 2] :- info[, 1] :+ 1
    nc_i    = 2:^nj_i :- 1
    C       = alogit_combos(nj)
    exb     = exp(u)
    lphi_c  = log(phi)
    lphi_nc = log(1 :- phi)
    P_ij    = J(rows(u), 1, .)

    for (i = 1; i <= rows(info); i++) {
        if(!anyof(select, info[i, 1]) & !anyof(select, info[i, 2])) continue
        u_ij  = panelsubmatrix(u, i, info)
        y_ij  = max(u_ij) :== u_ij
        dc_ij = panelsubmatrix(defcons, i, info)
        if (getdef) {
            Ci = C[|1, 1 \ nc_i[i] + 1, nj_i[i]|]
        }
        else {
            nc_0 = nc_i[i] - 2^(nj_i[i] - sum(dc_ij)) + 2
            Ci = C[|nc_0, 1 \ nc_i[i] + 1, nj_i[i]|]
        }

        if(sum(y_ij) != 1) {
            // if(sum(y_ij) > 1) {
            //     errprintf("dropped group with multiple positive outcomes\n")
            // }
            // else if(sum(y_ij) == 0) {
            //     errprintf("dropped group with no positive outcomes\n")
            // }
            continue
        }

        lPA    = Ci * panelsubmatrix(lphi_c, i, info) +
                 (1 :- Ci) * panelsubmatrix(lphi_nc, i, info)
        PA     = rowshape(exp(lPA), 1)
        exb_ij = panelsubmatrix(exb, i, info)
        PY     = rowshape(exb_ij, 1) :* (Ci :/ (Ci * exb_ij))
        _editmissing(PY, 0)
        if (getdef) PY[1, selectindex(dc_ij)] = 1
        PY_ij  = PA * PY
        P_ij[|info[i, ]'|] = colshape(PY_ij, 1)
    }
    return(P_ij)
}

// Predict using loop algorithm
real function alogit_predict_loop(transmorphic M,
                                  real matrix y,
                                  real matrix x,
                                  real matrix xz,
                                  real matrix defcons,
                                  string scalar method,
                                  string scalar predict,
                                  | real rowvector b)
{
    // Function variables
    real matrix info
    real colvector nj_i, xb, xzd
    real scalar kx, getdef, ipos, apos, db, dg, dydx
    string scalar afit
    real rowvector b0

    // Function helpers
    real colvector exb, phi_ij, lphi_c, lphi_nc

    // Loop variables to use for the likelihood
    real scalar i, sel, PA, p0
    real colvector y_ij, exb_ij, phi_i, lphi_c_ij, lphi_nc_ij
    real colvector dPYPC, PY, PY_ijc, PY_ij
    real rowvector c, dPY_ij

    // Variables to return
    real scalar mp0, loglik
    real colvector P_ij, PYC_ij, P0_i, dP_ij

    if (method != "exact") {
        errprintf("-algorithm(loop)- only available for -method(exact)-")
        exit()
    }

    // Variables to use in the computation
    // -----------------------------------

    // Re-initialize info, group if user changed them
    moptimize_init_touse(M, st_local("touse"))
    moptimize_init_by(M, st_local("group"))

    info = panelsetup(*moptimize_util_by(M), 1)
    nj_i = info[, 2] :- info[, 1] :+ 1

    // Use user coefficients if passed
    if (args() == 7) {
        afit = st_local("afit")
        b0   = moptimize_init_eq_coefs(M, 1), moptimize_init_eq_coefs(M, 2)
        b    = (afit == "loglik")? b0: moptimize_result_coefs(M)
    }
    kx  = cols(x)
    if ( moptimize_init_userinfo(M, 8) ) b = log(b)

    // Helper for derivatives/elasticities
    dydx = (predict == "dpycdpc") | (predict == "dpyc") | (predict == "dpc")
    if (dydx) {
        ipos = strtoreal(st_local("ipos"))
        apos = strtoreal(st_local("apos"))
        db   = (ipos == 0)? 1 : b[ipos]
        dg   = (apos == 0)? 1 : b[kx + apos]
    }

    // New fit!
    xb  = x  * b[|1 \ kx|]'
    if (predict == "u") return(xb)
    xzd = xz * b[|kx + 1 \ .|]'
    if (predict == "a") return(xzd)
    phi_ij = invlogit(xzd)
    getdef = (st_local("e(default)") != "") & (st_local("e(consider)") == "")
    getdef = getdef | ((st_local("default") != "") & (st_local("consider") == ""))
    if (!getdef) phi_ij[selectindex(defcons), ] = J(sum(defcons), 1, 1)
    if (predict == "pa") return(phi_ij)

    // For the likelihood, P(A), P(Y)
    _editvalue(phi_ij, 1, 1 - epsilon(1))  // Numerical precision foo
    _editvalue(phi_ij, 0, epsilon(1))      // Numerical precision foo
    exb      = exp(xb)                     // Helper for P(Y)
    lphi_c   = log(phi_ij)                 // Helper for P(A)
    lphi_nc  = log(1 :- phi_ij)            // Helper for P(A)

    // Loop through individuals
    loglik = mp0 = 0
    P_ij   = PYC_ij = P0_i = dP_ij = J(rows(y), 1, .)
    for (i = 1; i <= rows(info); i++) {

        y_ij = panelsubmatrix(y, i, info)
        if(sum(y_ij) != 1) {
            // if(sum(y_ij) > 1) {
            //     errprintf("dropped group with multiple positive outcomes\n")
            // }
            // else if(sum(y_ij) == 0) {
            //     errprintf("dropped group with no positive outcomes\n")
            // }
            continue
        }

        exb_ij = panelsubmatrix(exb, i, info)
        PYC_ij[|info[i, ]'|] = exb_ij / sum(exb_ij)
        if (predict == "pyc") continue

        lphi_c_ij  = panelsubmatrix(lphi_c, i, info)
        lphi_nc_ij = panelsubmatrix(lphi_nc, i, info)

        p0  = exp(sum(lphi_nc_ij))
        mp0 = mp0 + p0
        if (predict == "mp0") continue
        P0_i[|info[i, ]'|] = J(length(y_ij), 1, p0)
        if (predict == "p0") continue

        // Initialize P(Y)
        // ---------------

        sel    = selectindex(y_ij)
        if (getdef) {
            // If you specify a default good, start with the empty set
            c  = J(1, nj_i[i], 0)
            PY = panelsubmatrix(defcons, i, info)
        }
        else {
            // Otherwise, start with the first set that includes the
            // the goods that must be considered.
            c  = panelsubmatrix(defcons, i, info)'
            PY = c' :* exb_ij :/ (c * exb_ij)
        }
        PA     = exp(c * lphi_c_ij + (1 :- c) * lphi_nc_ij)
        PY_ijc = PA :* PY
        PY_ij  = PA * PY[sel]

        ///////////////
        //  Margins  //
        ///////////////
        if (predict == "dpycdpc") {
            phi_i = panelsubmatrix(phi_ij, i, info)
            dPYPC = (1 :- PY) :* db :+ (c' :- phi_i) :* dg
        }
        else if (predict == "dpyc") {
            dPYPC = (1 :- PY) :* db
        }
        else if (predict == "dpc") {
            phi_i = panelsubmatrix(phi_ij, i, info)
            dPYPC = (c' :- phi_i) :* dg
        }
        else {
            dPYPC = 1
        }
        dPY_ij = PA :* PY :* dPYPC
        ///////////////
        //  Margins  //
        ///////////////

        while (!min(c)) {
             _alogit_binadd_reverse(c)
            PA     = exp(c * lphi_c_ij + (1 :- c) * lphi_nc_ij)
            PY     = c' :* exb_ij :/ (c * exb_ij)
            PY_ijc = PY_ijc :+ PA :* PY
            PY_ij  = PY_ij + PA * PY[sel]
            ///////////////
            //  Margins  //
            ///////////////
            if (predict == "dpycdpc") {
                phi_i = panelsubmatrix(phi_ij, i, info)
                dPYPC = (1 :- PY) :* db :+ (c' :- phi_i) :* dg
            }
            else if (predict == "dpyc") {
                dPYPC = (1 :- PY) :* db
            }
            else if (predict == "dpc") {
                phi_i = panelsubmatrix(phi_ij, i, info)
                dPYPC = (c' :- phi_i) :* dg
            }
            else {
                dPYPC = 1
            }
            dPY_ij = dPY_ij :+ PA :* (PY :* dPYPC)
            ///////////////
            //  Margins  //
            ///////////////
        }

        loglik = loglik + log(PY_ij)
        P_ij[|info[i, ]'|]  = PY_ijc
        dP_ij[|info[i, ]'|] = dPY_ij
    }
    mp0 = mp0 / (i - 1)

         if (predict == "py")  return(P_ij)
    else if (predict == "pyc") return(PYC_ij)
    else if (predict == "ll")  return(loglik)
    else if (predict == "p0")  return(P0_i)
    else if (predict == "mp0") return(mp0)
    else if (dydx)             return(dP_ij)
}



// Counterfactual using loop algorithm
real colvector function alogit_counterfactual_loop(real colvector u,
                                                   real colvector phi,
                                                   real colvector defcons,
                                                   real matrix info)
{
    real scalar i, getdef, sel, PA
    real colvector P_ij, select, nj_i, PY, PY_ijc
    real colvector exb, lphi_c, lphi_nc, lphi_c_ij, lphi_nc_ij
    real colvector u_ij, y_ij, exb_ij
    real rowvector c

    getdef  = (st_local("e(default)") != "") & (st_local("e(consider)") == "")
    getdef  = getdef | ((st_local("default") != "") & (st_local("consider") == ""))
    if (!getdef) phi[selectindex(defcons), ] = J(sum(defcons), 1, 1)
    _editvalue(phi, 1, 1 - epsilon(1))
    _editvalue(phi, 0, epsilon(1))

    select  = selectindex(st_data(., st_local("touse")))
    nj_i    = info[, 2] :- info[, 1] :+ 1
    exb     = exp(u)
    lphi_c  = log(phi)
    lphi_nc = log(1 :- phi)
    P_ij    = J(rows(u), 1, .)

    for (i = 1; i <= rows(info); i++) {
        if(!anyof(select, info[i, 1]) & !anyof(select, info[i, 2])) continue
        u_ij = panelsubmatrix(u, i, info)
        y_ij = max(u_ij) :== u_ij

        if(sum(y_ij) != 1) {
            // if(sum(y_ij) > 1) {
            //     errprintf("dropped group with multiple positive outcomes\n")
            // }
            // else if(sum(y_ij) == 0) {
            //     errprintf("dropped group with no positive outcomes\n")
            // }
            continue
        }

        lphi_c_ij  = panelsubmatrix(lphi_c, i, info)
        lphi_nc_ij = panelsubmatrix(lphi_nc, i, info)
        exb_ij     = panelsubmatrix(exb, i, info)

        // Initialize P(Y)
        // ---------------

        sel    = selectindex(y_ij)
        if (getdef) {
            // If you specify a default good, start with the empty set
            c  = J(1, nj_i[i], 0)
            PY = panelsubmatrix(defcons, i, info)
        }
        else {
            // Otherwise, start with the first set that includes the
            // goods that must be considered.
            c  = panelsubmatrix(defcons, i, info)'
            PY = c' :* exb_ij :/ (c * exb_ij)
        }
        PA     = exp(c * lphi_c_ij + (1 :- c) * lphi_nc_ij)
        PY_ijc = PA :* PY

        // Loop through all consideration sets
        // -----------------------------------

        while (!min(c)) {
             _alogit_binadd_reverse(c)
            PA     = exp(c * lphi_c_ij + (1 :- c) * lphi_nc_ij)
            PY     = c' :* exb_ij :/ (c * exb_ij)
            PY_ijc = PY_ijc :+ PA :* PY
        }

        P_ij[|info[i, ]'|] = PY_ijc
    }
    return(P_ij)
}
end
///////////////////////////////////////////////////////////////////////
//                           DSC functions                           //
///////////////////////////////////////////////////////////////////////

// capture mata: mata drop _alogit_dsc_exact()
// capture mata: mata drop _alogit_dsc_exact_fast()
// capture mata: mata drop alogit_dsc_predict()
// capture mata: mata drop alogit_dsc_counterfactual()

mata

// Loglik for DSC model, only one loop
void function _alogit_dsc_exact(transmorphic M,
                                real scalar todo,
                                real rowvector b,
                                fv, g, H)
{
    // Function variables
    real matrix x, xz, info, xbmat, xzdmat
    real colvector y, def, xb, xzd, d_sel
    real rowvector bb, gd
    real scalar asexp, kx, kxz, ktot

    // Function helpers
    real matrix xdphi_ij
    real colvector exb, pa_ij, pi_ij

    // Loop variables to use for the likelihood
    real scalar i, k, sel, PA
    real colvector y_ij, exb_ij, d_ij, PY, PY_ij, P_ij

    // Loop variables to use for the gradient
    real scalar index, dLdP
    real rowvector dLdb_k, dLdg_k, dLdb_k_aux
    real matrix x_ij, xz_ij, xdphi_i

    // Loop variables to use for the Hessian
    real rowvector PYx_ij
    real matrix hb, hg, hbg
    real matrix dPdb_kl1_1, dPdb_kl2_1, dPdb_kl2_2, dPdb_kl
    real matrix dPdbg_kl
    real matrix dPdgd_aux, dPdgd_kl

    // Variables to use in the computation
    // -----------------------------------

    // Get choice sets, xb, and xg + zd
    x     = moptimize_init_userinfo(M, 5) // utility vars
    xz    = moptimize_init_userinfo(M, 6) // attention vars
    info  = moptimize_init_userinfo(M, 7) // pos of each group in data
    asexp = moptimize_init_userinfo(M, 8) // Estimate exp(b) isntead of b
    def   = moptimize_init_userinfo(M, 9) // default good
    y     = moptimize_util_depvar(M, 1)   // outcome (choice)
    kx    = cols(x)
    kxz   = cols(xz)
    ktot  = kx + kxz

    if ( asexp ) {
        bb      = b[|1 \ kx|]      // beta
        gd      = b[|kx + 1 \ .|]  // gamma, delta
        xbmat   = (bb :^ x)        // exp(b)^x * b
        xzdmat  = (gd :^ xz)       // exp(g, d)^(x, z)
        xb      = xbmat[, 1]       // prod(exp(b)^x over kx) = exp(xb)
        xzd     = xzdmat[, 1]      // prod(exp(g, d)^(x, z) over kx + kz) = exp(xg + zd)
        for (k = 2; k <= kx; k++)  xb  = xb  :* xbmat[, k]
        for (k = 2; k <= kxz; k++) xzd = xzd :* xzdmat[, k]
    }
    else {
        xb  = moptimize_util_xb(M, b, 1) // x * b
        xzd = moptimize_util_xb(M, b, 2) // x * g + z * d
    }

    d_sel = selectindex(def)  // Index of default goods
    xzd   = xzd[d_sel]        // P(A) only depends on default good

    // Get phi for P(attention)
    pa_ij = asexp? (xzd :/ (1 :+ xzd)): invlogit(xzd)

    _editvalue(pa_ij, 1, 1 - epsilon(1)) // Numerical precision foo
    _editvalue(pa_ij, 0, epsilon(1))     // Numerical precision foo

    pi_ij = 1 :- pa_ij         // Get phi for P(inattention)
    exb   = asexp? xb: exp(xb) // Helper for P(Y)

    // Helpers for the Hessian, gradient
    xdphi_ij = ( asexp? (xzd :/ (1 :+ xzd):^2): alogit_logisticden(xzd) ) :* xz[d_sel, ]

    H  = J(ktot, ktot,  0)  // Hessian
    g  = J(1, ktot,  0)     // Gradient
    fv = 0                  // Log likelihood

    // Loop over individuals
    // ---------------------

    for (i = 1; i <= rows(info); i++) {
        // Drop individuals with > 1 or 0 goods chosen
        y_ij = panelsubmatrix(y, i, info)
        if(sum(y_ij) != 1) continue

        // Compute the likelihood
        // ----------------------

        // Probability of each choice set. We want to compute P(Y |
        // c, theta) P(c | theta); we compute P(c | theta) / (total
        // for P(Y)) and then sum over all the choice sets
        sel    = selectindex (y_ij)
        d_ij   = panelsubmatrix(def, i, info)
        exb_ij = panelsubmatrix(exb, i, info)
        PY     = exb_ij :/ sum(exb_ij)
        PA     = pa_ij[i]
        PY_ij  = PA :* PY
        P_ij   = PY_ij + d_ij :* pi_ij[i]

        // Compute the likelihood
        fv = fv + log(P_ij[sel])

        // Compute the gradient and the Hessian
        // ------------------------------------

        if (todo >= 1) {
            index   = selectindex (d_ij)
            dLdP    = 1 / P_ij[sel]
            x_ij    = panelsubmatrix(x, i, info)
            xdphi_i = xdphi_ij[i, ]

            // Gradient
            dLdb_k_aux = (x_ij[sel, ] :- PY' * x_ij)
            dLdb_k = dLdb_k_aux * PY_ij[sel] * dLdP
            dLdg_k = xdphi_i :* (PY[sel] - (sel == index)) * dLdP

            g = g :+ (dLdb_k, dLdg_k)

            if (todo == 2) {
                xz_ij = xz[d_sel[i], ]

                // Hessian, beta
                PYx_ij     = PY' * x_ij
                dPdb_kl1_1 = dLdb_k_aux' * dLdb_k_aux
                dPdb_kl2_1 = PYx_ij' * PYx_ij
                dPdb_kl2_2 = x_ij' * diag(PY) * x_ij
                dPdb_kl    = (dPdb_kl1_1 - (dPdb_kl2_2 - dPdb_kl2_1)) * PY_ij[sel]

                // Hessian, cross
                dPdbg_kl   = PY[sel] * (dLdb_k_aux' * xdphi_i)

                // Hessian, gamma, delta
                dPdgd_aux  = (1 - 2 * PA) * (PY[sel] - (sel == index))
                dPdgd_kl   = dPdgd_aux * (xdphi_i' * xz_ij)

                // Hessian
                hb  = dLdP :* dPdb_kl  :- dLdb_k' * dLdb_k
                hg  = dLdP :* dPdgd_kl :- dLdg_k' * dLdg_k
                hbg = dLdP :* dPdbg_kl :- dLdb_k' * dLdg_k
                H = H :+ ((hb, hbg) \ (hbg', hg))
            }
        }
    }
    if (todo == 2) H = makesymmetric(H')
    if ( asexp ) {
        if (todo >= 1) g = g :/ b
        if (todo == 2) H = H :/ (b' * b) :- diag(g :/ b)
    }
}

void function _alogit_dsc_exact_fast(transmorphic M,
                                     real scalar todo,
                                     real rowvector b,
                                     fv, g, H)
{
    real scalar ktot
    ktot = st_numscalar("__alogit_ktot")
    H    = J(ktot, ktot,  0);
    g    = J(1, ktot,  0);
    st_numscalar("__alogit_ll", .)
    st_matrix("__alogit_b", b)
    st_numscalar("__alogit_todo", todo)
    if (moptimize_init_userinfo(M, 1) == "fast") {
        stata("plugin call aloglik_fast " + st_local("fastcall") + " dsc")
    }
    else {
        stata("plugin call aloglik_faster " + st_local("fastcall") + " dsc")
    }
    if (todo >= 1) g = st_matrix("__alogit_g")
    if (todo == 2) H = makesymmetric(uppertriangle(st_matrix("__alogit_H"))')
    fv = st_numscalar("__alogit_ll")
    if ( moptimize_init_userinfo(M, 8) ) {
        if (todo >= 1) g = g :/ b
        if (todo == 2) H = H :/ (b' * b) :- diag(g :/ b)
    }
}

///////////////////////////////////////////////////////////////////////
//                      DSC prediction helpers                       //
///////////////////////////////////////////////////////////////////////

real function alogit_dsc_predict(transmorphic M,
                                 real matrix y,
                                 real matrix x,
                                 real matrix xz,
                                 real matrix def,
                                 string scalar method,
                                 string scalar predict,
                                 | real rowvector b)
{

    // Function variables
    real matrix info
    real colvector xb, xzd, d_sel
    real scalar kx, ipos, apos, db, dg, dydx
    string scalar afit
    real rowvector b0

    // Function helpers
    real colvector exb, phi_ij, pa_ij, pi_ij

    // Loop variables to use for the likelihood
    real scalar i, PA, PI
    real colvector y_ij, exb_ij, d_ij
    real colvector dPYPC, dPYPC0, PY, PY_ij
    real rowvector dPY_ij

    // Variables to return
    real scalar mp0, mp1, loglik
    real colvector P_ij, PYC_ij, P0_i, dP_ij

    if (method == "importance") {
        errprintf("-method(importance)- is not available for -model(dsc)-\n")
        exit()
    }

    // Variables to use in the computation
    // -----------------------------------

    // Re-initialize info, group if user changed them
    moptimize_init_touse(M, st_local("touse"))
    moptimize_init_by(M, st_local("group"))

    info = panelsetup(*moptimize_util_by(M), 1)

    // Use user coefficients if passed
    if (args() == 7) {
        afit = st_local("afit")
        b0   = moptimize_init_eq_coefs(M, 1), moptimize_init_eq_coefs(M, 2)
        b    = (afit == "loglik")? b0: moptimize_result_coefs(M)
    }
    kx  = cols(x)
    if ( moptimize_init_userinfo(M, 8) ) b = log(b)

    // Helper for derivatives/elasticities
    dydx = (predict == "dpycdpc") | (predict == "dpyc") | (predict == "dpc")
    if (dydx) {
        ipos = strtoreal(st_local("ipos"))
        apos = strtoreal(st_local("apos"))
        db   = (ipos == 0)? 1 : b[ipos]
        dg   = (apos == 0)? 1 : b[kx + apos]
    }

    // New fit!
    xb  = x  * b[|1 \ kx|]'
    if (predict == "u") return(xb)
    xzd = xz * b[|kx + 1 \ .|]'
    if (predict == "a") return(xzd)

    // For the likelihood, P(A), P(Y)
    d_sel = selectindex(def)
    pa_ij = invlogit(xzd[d_sel])
    _editvalue(pa_ij, 1, 1 - epsilon(1))
    _editvalue(pa_ij, 0, epsilon(1))
    pi_ij = 1 :- pa_ij
    exb   = exp(xb)

    // Loop through individuals
    loglik = mp0 = mp1 = 0
    P_ij   = PYC_ij = P0_i = dP_ij = phi_ij = J(rows(y), 1, .)
    for (i = 1; i <= rows(info); i++) {
        y_ij = panelsubmatrix(y, i, info)
        if(sum(y_ij) != 1) {
            // if(sum(y_ij) > 1) {
            //     errprintf("dropped group with multiple positive outcomes\n")
            // }
            // else if(sum(y_ij) == 0) {
            //     errprintf("dropped group with no positive outcomes\n")
            // }
            continue
        }

        exb_ij = panelsubmatrix(exb, i, info)
        PY     = exb_ij :/ sum(exb_ij)
        PYC_ij[|info[i, ]'|] = PY
        if (predict == "pyc") continue

        d_ij = panelsubmatrix(def, i, info)
        PI   = pi_ij[i]
        PA   = pa_ij[i]

        P0_i[|info[i, ]'|] = J(length(y_ij), 1, PI)
        if (predict == "p0") continue

        phi_ij[|info[i, ]'|] = J(rows(y_ij), 1, PA)
        if (predict == "pa") continue

        mp0 = mp0 + PI
        if (predict == "mp0") continue

        mp1 = mp1 + PA
        if (predict == "mp1") continue

        _editmissing(PY, 0)
        PY_ij = PA :* PY + d_ij :* PI
        P_ij[|info[i, ]'|] = PY_ij
        if (predict == "py") continue

        loglik = loglik + log(PY_ij[selectindex(y_ij)])
        if (predict == "ll") continue

        ///////////////
        //  Margins  //
        ///////////////
        if (predict == "dpycdpc") {
            dPYPC0 = (PA * PI * dg) :* (PY :- 1)
            dPYPC  = (1 :- PY) :* db
        }
        else if (predict == "dpyc") {
            dPYPC0 = 0
            dPYPC  = (1 :- PY) :* db
        }
        else if (predict == "dpc") {
            dPYPC0 = (PA * PI * dg) :* (PY :- 1)
            dPYPC  = 0
        }
        else {
            dPYPC0 = 0
            dPYPC  = 1
        }
        ///////////////
        //  Margins  //
        ///////////////

        dPY_ij = (PA :* PY :* dPYPC) + d_ij :* dPYPC0
        dP_ij[|info[i, ]'|]  = dPY_ij
    }
    mp0 = mp0 / (i - 1)
    mp1 = mp1 / (i - 1)

         if (predict == "py")  return(P_ij)
    else if (predict == "pyc") return(PYC_ij)
    else if (predict == "pa")  return(phi_ij)
    else if (predict == "ll")  return(loglik)
    else if (predict == "p0")  return(P0_i)
    else if (predict == "mp0") return(mp0)
    else if (predict == "mp1") return(mp1)
    else if (dydx)             return(dP_ij)
}

real colvector function alogit_dsc_counterfactual(real colvector u,
                                                  real colvector phi,
                                                  real colvector def,
                                                  real matrix info)
{
    real scalar i, PA, PI
    real colvector P_ij, select, PY
    real colvector exb, pa_ij, PY_ij
    real colvector u_ij, y_ij, exb_ij, d_ij

    select = selectindex(st_data(., st_local("touse")))
    exb    = exp(u)
    pa_ij  = phi[selectindex(def)]
    _editvalue(pa_ij, 1, 1 - epsilon(1))
    _editvalue(pa_ij, 0, epsilon(1))
    P_ij   = J(rows(u), 1, .)

    for (i = 1; i <= rows(info); i++) {
        if(!anyof(select, info[i, 1]) & !anyof(select, info[i, 2])) continue
        u_ij = panelsubmatrix(u, i, info)
        y_ij = max(u_ij) :== u_ij

        if(sum(y_ij) != 1) {
            // if(sum(y_ij) > 1) {
            //     errprintf("dropped group with multiple positive outcomes\n")
            // }
            // else if(sum(y_ij) == 0) {
            //     errprintf("dropped group with no positive outcomes\n")
            // }
            continue
        }
        d_ij   = panelsubmatrix(def, i, info)
        PI     = 1 - pa_ij[i]
        PA     = pa_ij[i]
        exb_ij = panelsubmatrix(exb, i, info)
        PY     = exb_ij :/ sum(exb_ij)
        _editmissing(PY, 0)
        PY_ij  = PA :* PY
        P_ij[|info[i, ]'|] = PY_ij + d_ij :* PI
    }
    return(P_ij)
}
end