program define choi_lr_test, rclass
version 14
* Calculate statistics for 2x2 tables contitioned on the marginal
* exposed rate. These include the
* Conditional maximum likelihood estimate of odds ratio (OR).
* Conditional likelihood ratio for the null hypothesis that OR = 1
* Choi's likelihood ratio chi-squared test for 2x2 tables
* The 1/6.8259358 likelihood support interval (LSI) for the OR
* The 1/k LSI for the OR using some other value of k
* See Choi et al. 2015 PLoS ONE. 10(4): e0121263.
* INPUT:
* case = 0/1 indicator variable for case-control status
* exposed = 0/1 indicator variable for exposure
* k = a value used to calculate the 1/k LSI for the OR.
* The 1/6.8259358 LSI is calculated by
* default, which equals the frequentist 95% confidence
* interval for normally distrubuted statistics.
* OUTPUT:
* or_cond = Maximum likelihood estimate (MLE) of the odds ratio (OR) for
* exposure for cases relative to controls conditioned
* on the total number of exposed subjects.
* clr = Conditional likelihood ratio (LR) for the null hypothesis that OR = 1
* chi2_clr = Choi's LR chi-squared statistic. This is equation 9 of Choi et al.
* p_choi = P value associated with the null hypothesis that OR = 1
* [lr6pt8lsi_lb, lr6pt8lsi_ub]
* = likelihood ratio support interval for OR with k = 6.8259358.
* [lrklsi_lb, lrklsi_ub]
* = the likelihood ratio support interval for the user's value of k
* SOME OTHER VARIABLES:
* psi = the log of an odds ratio estimate. Not necessarily the MLE
* psi_mle = the conditional MLE of psi
* y1 = number of exposed cases
* y2 = number of exposed controls
* n1 = number of cases
* n2 = number of controls
* yplus = y1 + y2 = the marginal (total) number exposed subjects
* max_lnf = probability mass function of y1|yplus at psi_mle
*
* NOTATION:
* This program calls Stata's cc program that gives output in terms of
* a case-control study. Choi et al. discusses two treatments with n1 and
* n2 patients who receive treatments 1 and 2 respectively. y1 and y2 are
* the number of successes observed on these two treatments. We use this
* notation in the calculations below, although output is given in terms
* of a conventional case-control study.
* CALLING SYNTAX
* choi_lr_test var_case var_exposed [if] [in] [weight] [,
* choi_lr_test_options]
* See Choi et al. 2015 PLoS ONE. 10(4): e0121263.
syntax varlist(min=2 max=2 numeric default=none) [if] [in] [fw] [, K(numlist max=1) /*
*/ Woolf TB COrnfield Exact Level(cilevel) * ]
tokenize `varlist'
local case `1'
local exposed `2'
marksample use
qui count if `use'
if (r(N)==0) {
error 2000
}
local levopt="level(`level')"
preserve
qui keep if `use'
tempvar WGT
if `"`weight'"'!="" {
qui gen double `WGT' `exp' if `use'
local w "[fweight=`WGT']"
}
else {
qui gen double `WGT'=1
local w "[fweight=`WGT']"
}
* Add one record for each combination of case-exposure variables
* in order to handle cells with no patients.
local N = _N
local Np4 = _N+4
quietly set obs `Np4'
quietly replace `case' = 0 if _n==`N' +1
quietly replace `exposed' = 0 if _n==`N' +1
quietly replace `WGT' = 1 if _n==`N' +1
quietly replace `case' = 0 if _n==`N' +2
quietly replace `exposed' = 1 if _n==`N' +2
quietly replace `WGT' = 1 if _n==`N' +2
quietly replace `case' = 1 if _n==`N' +3
quietly replace `exposed' = 0 if _n==`N' +3
quietly replace `WGT' = 1 if _n==`N' +3
quietly replace `case' = 1 if _n==`N' +4
quietly replace `exposed' = 1 if _n==`N' +4
quietly replace `WGT' = 1 if _n==`N' +4
collapse (sum) `WGT' , by(`exposed' `case')
sort `exposed' `case'
* Delete the 4 bogus patients added above
quietly replace `WGT' = `WGT' - 1
* Detect bad input data
local abort = 0
if _N != 4 {
display as error "Program expects data for a 2x2 table. `case' "
display as error "and `control' must be 0/1 indicator variables"
display as error "N=" _N
local abort = 1
}
else if _N == 4 {
if abs(`exposed'[1]-`exposed'[2]) +abs(`exposed'[3] - `exposed'[4]) != 0 {
display as error "Bad values for `exposed'"
local abort = 1
}
if abs(`case'[1]-`case'[3]) +abs(`case'[2] - `case'[4]) != 0 {
display as error "Bad values for `case'"
local abort = 1
}
* Identify tables with too many zeros
if `WGT'[4]*`WGT'[1]==0 & `WGT'[2]*`WGT'[3]==0 {
display as error "Too many empty cells to estimate OR"
local abort = 1
}
}
if `abort' != 1 { // Input looks OK. Run program. Abort program if `abort' == 1
* Confirm that needed scalars do not exist in the calling program. If they
* do, save them with another name so that they can be restored when the
* program ends.
foreach v in n1 n2 y1 y2 dummy psi p_choi chi2_clr clr max_lnf psi_mle k {
capture confirm scalar `v'
if _rc == 0 {
scalar ______TMP`v' = `v'
}
}
scalar y1 = `WGT'[4]
scalar y2 = `WGT'[3]
scalar n2 = `WGT'[1] + `WGT'[3]
scalar n1 = `WGT'[2] + `WGT'[4]
*
* Do conventional case-control analysis
*
cc `case' `exposed' `w' , `woolf' `tb' `cornfield' `exact' `levopt' `options'
local log_or_unconditional = log(r(or))
return scalar p = r(p) //two-sided p-value from the 2x2 chi^2 statistic without continuity correction
return scalar p1_exact = r(p1_exact) //chi-squared or one-sided exact significance
return scalar p_exact = r(p_exact) //two-sided exact significance
return scalar or = r(or) //Unconditioned MLE of the odds ratio
return scalar lb_or = r(lb_or) //lower bound of CI for or
return scalar ub_or = r(ub_or) //upper bound of CI for or
return scalar afe = r(afe) //attributable (prev.) fraction among exposed
return scalar lb_afe = r(lb_afe) //lower bound of CI for afe
return scalar ub_afe = r(ub_afe) //upper bound of CI for afe
return scalar afp = r(afp) //attributable fraction for the population
return scalar chi2 = r(chi2) //2x2 chi^2 statistic without continuity correction
display " "
* Identify if estimated OR = 0, infinity, of some positive number
if `WGT'[4]*`WGT'[1] > 0 & `WGT'[2]*`WGT'[3] > 0 { // all cells are positive
clear
quietly set obs 1
gen y1 = y1
gen y2 = y2
* Calculate the conditional MLE of psi = log(or_cond)
* See http://www.stata.com/features/overview/maximum-likelihood-estimation/
* for an introduction to Stata's ml program. The example given in this URL
* was modified to maximize the likelihood function given in equation 7 of
* Choi et al. 2015.
quietly ml model lf choi_lr_hypergeom (y1=)
quietly ml init `log_or_unconditional', copy
quietly ml maximize
* The preceding command returns _b[_cons] which is the MLE of psi,
* and e(ll) which is the maximum value of the log likelihood function
scalar psi_mle = _b[_cons]
scalar max_lnf = e(ll)
* Calculate the probability of the observed results under the null hypothesis
* that psi = 0.
choi_lr_hyperg_prob n1 n2 y1 y2 0
* r(lnf) equals the log of the probability mass function when psi = 0
scalar clr = exp(max_lnf - r(lnf))
scalar chi2_clr = -2*(r(lnf)-max_lnf)
scalar p_choi = chi2tail(1, chi2_clr)
display as result " "
display as result "Likelihood ratio statistics conditioned on the marginal exposure rate"
display as result " "
display as result "Maximum likelihood estimate (MLE) of odds ratio = "exp(psi_mle)
display as result "Likelihood ratio under the null hypothesis = "clr
display as result "Likelihood ratio chi-squared statistic = "chi2_clr
display as result "P value for the null hypothesis that the OR equals 1 = "p_choi
return scalar or_cond = exp(_b[_cons])
return scalar clr = clr
return scalar chi2_clr = chi2_clr
return scalar p_choi = p_choi
global k_global = 6.8259358
* Calculate the 1/6.8259358 LSI for the OR
choi_lr_support_interval
return scalar lr6pt8lsi_lb =LRsupport_lb
return scalar lr6pt8lsi_ub =LRsupport_ub
* Calculate the 1/k LSI for the OR
if "`k'"~="" {
global k_global = `k'
choi_lr_support_interval
return scalar lrklsi_lb =LRsupport_lb
return scalar lrklsi_ub =LRsupport_ub
}
}
else if `WGT'[4]*`WGT'[1] > 0 & `WGT'[2]*`WGT'[3] == 0 { // Estimated OR = infinity
* Equation (7) of Choi et al 2015 approaches 1 as psi approaches infinity
* A proof of this is given in Equn7whenAcellEquals0.pdf, which is stored in
* my dropbox folder \OFFICE_desktop\ChoiFisherTest\Stata Program\ado\withZeros
scalar max_lnf = 0
choi_lr_hyperg_prob n1 n2 y1 y2 0
scalar clr = exp(max_lnf - r(lnf))
scalar chi2_clr = -2*(r(lnf)-max_lnf)
scalar p_choi = chi2tail(1, chi2_clr)
display as result " "
display as result "Likelihood ratio statistics conditioned on the marginal exposure rate"
display as result " "
display as result "Maximum likelihood estimate (MLE) of odds ratio = infinity"
display as result "Likelihood ratio under the null hypothesis = "clr
display as result "Likelihood ratio chi-squared statistic = "chi2_clr
display as result "P value for the null hypothesis that the OR equals 1 = "p_choi
return scalar or_cond = .
return scalar clr = clr
return scalar chi2_clr = chi2_clr
return scalar p_choi = p_choi
global k_global = 6.8259358
* Calculate the 1/6.8259358 LSI for the OR
choi_lr_support_interval_big_or
return scalar lr6pt8lsi_lb =LRsupport_lb
return scalar lr6pt8lsi_ub =.
* To comfirm this lower bound execute the next three lines
* local debug_psi = log(LRsupport_lb) //debug
* choi_lr_hyperg_prob n1 n2 y1 y2 `debug_psi' //debug
* di "LR at lb = " 1/exp(r(lnf)) //debug
* Calculate the 1/k LSI for the OR
if "`k'"~="" {
global k_global = `k'
choi_lr_support_interval_big_or
return scalar lrklsi_lb =LRsupport_lb
return scalar lrklsi_ub =.
* To comfirm this lower bound execute the next three lines
* local debug_psi = log(LRsupport_lb) //debug
* choi_lr_hyperg_prob n1 n2 y1 y2 `debug_psi' //debug
* di "LR at lb = " 1/exp(r(lnf)) //debug
}
}
else if `WGT'[4]*`WGT'[1] == 0 & `WGT'[2]*`WGT'[3] > 0 { // Estimated OR = 0
* Equation (7) of Choi et al 2015 approaches 1 as psi approaches minus infinity
* A proof of this is given in Equn7whenAcellEquals0.pdf, which is stored in
* my dropbox folder \OFFICE_desktop\ChoiFisherTest\Stata Program\ado\withZeros
scalar max_lnf = 0
choi_lr_hyperg_prob n1 n2 y1 y2 0
scalar clr = exp(max_lnf - r(lnf))
scalar chi2_clr = -2*(r(lnf)-max_lnf)
scalar p_choi = chi2tail(1, chi2_clr)
display as result " "
display as result "Likelihood ratio statistics conditioned on the marginal exposure rate"
display as result " "
display as result "Maximum likelihood estimate (MLE) of odds ratio = 0"
display as result "Likelihood ratio under the null hypothesis = "clr
display as result "Likelihood ratio chi-squared statistic = "chi2_clr
display as result "P value for the null hypothesis that the OR equals 1 = "p_choi
return scalar or_cond = 0
return scalar clr = clr
return scalar chi2_clr = chi2_clr
return scalar p_choi = p_choi
global k_global = 6.8259358
* Calculate the 1/6.8259358 LSI for the OR
choi_lr_support_interval_or_eq_0
return scalar lr6pt8lsi_lb =0
return scalar lr6pt8lsi_ub =LRsupport_ub
* To comfirm this upper bound execute the next three lines
* local debug_psi = log(LRsupport_ub) //debug
* choi_lr_hyperg_prob n1 n2 y1 y2 `debug_psi' //debug
* di "LR at ub = " 1/exp(r(lnf)) //debug
* Calculate the 1/k LSI for the OR
if "`k'"~="" {
global k_global = `k'
choi_lr_support_interval_or_eq_0
return scalar lrklsi_lb =0
return scalar lrklsi_ub =LRsupport_ub
* To comfirm this upper bound execute the next three lines
* local debug_psi = log(LRsupport_ub) //debug
* choi_lr_hyperg_prob n1 n2 y1 y2 `debug_psi' //debug
* di "LR at ub = " 1/exp(r(lnf)) //debug
}
}
}
else display as error "Bad data input"
* Restore any scalar values from the calling program that have been changed
foreach v in n1 n2 y1 y2 dummy psi p_choi chi2_clr clr max_lnf psi_mle k {
capture confirm scalar ______TMP`v'
if _rc == 0 {
scalar `v' = ______TMP`v'
scalar drop ______TMP`v'
}
}
end // ********************************************************