// NOTES ON ORGANIZATION OF CODE // - The main program _tesen_cpi consists of 8 steps listed below // - Section A are Stata programs used by _tesen_cpi // - Section B are mata functions used by these programs // STEPS IMPLEMENTING _tesen_cpi // 1. Define temporary variables // 2. Parse input // 3. Calculate propensity score(s) // 4. Steps required for particular statistics // 4.1 (Averaged stats: calculate interpolation nodes) // 4.2 (CQTE: approximate quantile function limits) // 4.3 (ATET: unconditional expectations) // 5. Form matrix of c-dependence values // 6. Calculate bounds on TE statistic for each c // 6.1 Monotonize solution // 7. Calculate breakdown point // 8. Return results // PRIVATE STATA PROGRAMS USED BY _tesen_cpi // A.1 Stata programs that calculate bounds on a treatment effect statistics // for one value of c-dependence (one program for each statistic) // A.2 Stata programs that calculate the breakdown point (a general bisection // algorithm, and a wrapper function handling interface with bisection // algorithm) // A.3 Auxiliary code for calculation of QTE. // A.4 Miscellaneous Utilities. // PRIVATE MATA FUNCTIONS USED BY _tesen_cpi // B.1 Functions to calculate treatment effects statistics // B.2 PCHIP approximations and integrals // B.3 Miscellaneous Utilities // PROGRAM: tesensitivity conditional partial independence // SYNTAX/DOCUMENTATION: See Stata help file program define _tesen_cpi, eclass // ========================================================================= // 1. Define temporary variables // ========================================================================= tempvar xw0 xw1 pindex p0 p1 pmax p11 p10 xnodes ynodes coef tau_n_0 tau_n_1 tempvar exp_y0 exp_y1 up0 up1 c_table c_search_table lb ub b V tempvar wsupp call cref tempvar bscoef0 bscoef1 // ========================================================================= // 2. Parse Input // ========================================================================= syntax anything [if] [in], /// [cqte cate ate atet qte /// COVariates(string) /// QCOVariates(string) /// MEDian /// Quantile(real 0.5) /// Cgrid(int 40) /// CREFerence /// BREAKdown(real 0) /// NOBREAKdown /// nodes(int 100) /// tol(real 0.001) /// verbose /// debug] if "`debug'" != "" { di "command line input is: `0'" } // parse model specification _parse expand eqn op: anything gettoken ovar omvarlist : eqn_1 gettoken tvar tmvarlist : eqn_2 // check if covariates are the same for both models local covariates_equal : list omvarlist == tmvarlist if ~`covariates_equal'{ local tmvarlist : list omvarlist|tmvarlist di as error "{p}currently, covariates in the outcome and treatment models " /// "must be the same, will use union of both varlists.{p_end}" /// "{p}covariates used are: `tmvarlist' {p_end}" } // parse variables local Y `ovar' local X `tvar' fvrevar `tmvarlist' // this expands factor variables using temporary variables local W `r(varlist)' // mark sample marksample touse markout `touse' `Y' `X' `W' // calculate dimensions qui count if `touse' local nobs = r(N) local K: word count `W' local nvars = `K' + 2 // treatment + covariates + intercept // process verbose + debug option local verbose = "`verbose'" != "" local debug = "`debug'" != "" // process nobreakdown option if "`nobreakdown'" == "nobreakdown"{ local breakdown . } // check if treatment variable is binary _checkbinary `X', expect_supp(0, 1) fatal // TODO: throws error if support of treatment isn't 0/1 - include other // defaults w/ warnings? // check if outcome variable is binary _checkbinary `Y' local binary_outcome `s(`Y'_binary)' if `binary_outcome' { _checkbinary `Y', expect_supp(0, 1) fatal // TODO: throws error if support of outcome isn't 0/1 - include other // defaults w/ warnings? } // check for multiple statistics and store statistic type local stat `cqte'`cate'`ate'`atet'`qte' if strlen("`stat'") > 4 { di as error "only one statistic can be calculated in a single call to " /// "tesensitivity cpi" qui error 109 } local cond_stat = ("`cate'" != "") | ("`cqte'" != "") local qtl_stat = ("`qte'" != "") | ("`cqte'" != "") // store the name of the c_function to use (this is the function // that calculates bounds for a single c-dependence value) local c_function `stat'_bounds_c if `binary_outcome'{ local c_function binary_`c_function' } // check if binary outcome, check only ate specified if `binary_outcome' & "`stat'" != "ate" { di as error "for binary outcome, only ate is currently supported" error 109 } // form covariate matrix for conditional statistics // TODO: move this code to auxiliary program? if `cond_stat' { local ncovariates : word count `covariates' local nqcovariates : word count `qcovariates' // default to median if selected or mean otherwise if "`median'" != "" | "`covariates'" == "median" | "`qcovariates'" == "median" { foreach v of varlist `W' { _pctile `v' if `touse', p(50) matrix `wsupp' = (nullmat(`wsupp'), r(r1)) } matrix colnames `wsupp' = `W' } else { quietly mean `W' if `touse' matrix `wsupp' = e(b) matrix colnames `wsupp' = `W' } // overwrite covariate matrix with custom direct values if specified if `ncovariates' == 1 & "`covariates'" != "mean" & "`covariates'" != "median" { local ncov : colsof `covariates' local nrow : rowsof `covariates' // transpose if passed a column vector rather than row vector if `ncov' < `nrow' { matrix `covariates' = `covariates'' local ncov : colsof `covariates' } local covnames : colnames `covariates' local covname1 : word 1 of `covnames' if "`covname1'" == "c1" { matrix `wsupp' = `covariates' matrix colnames `wsupp' = `W' } else { forvalues i = 1/`ncov' { local v : word `i' of `covnames' tempname newentry matrix `newentry' = `covariates'[1, "`v'"] matrix `wsupp'[1,colnumb(`wsupp', "`v'")] = `newentry' } } } // convert list to matrix if supplied else if `ncovariates' > 1 { forvalues i = 1/`ncovariates'{ local newval : word `i' of `covariates' matrix `wsupp'[1,`i'] = `newval' } matrix colnames `wsupp' = `W' } // overwrite covariate matrix with custom quantile values if specified if `nqcovariates' == 1 & "`qcovariates'" != "median" { local nqcov : colsof `qcovariates' local nqrow : rowsof `qcovariates' // transpose if passed a column vector rather than row vector if `nqcov' < `nqrow' { matrix `qcovariates' = `qcovariates'' local nqcov : colsof `qcovariates' } // check names of matrix local covnames : colnames `qcovariates' local covname1 : word 1 of `covnames' // if unlabeled, must be all covaraites in order - add names if "`covname1'" == "c1" { matrix colnames `qcovariates' = `W' local covnames `W' } // calculate quantiles and replace covariate values for each forvalues i = 1/`nqcov' { local v : word `i' of `covnames' local pct = `qcovariates'[1, `i'] * 100 _pctile `v' if `touse', p(`pct') matrix `wsupp'[1,colnumb(`wsupp', "`v'")] = r(r1) } } // convert list to matrix if supplied and replace covariate values (must be all covariates) else if `nqcovariates' > 1 { forvalues i = 1/`nqcovariates'{ local v : word `i' of `W' local newval : word `i' of `qcovariates' local pct = `newval' * 100 _pctile `v' if `touse', p(`pct') matrix `wsupp'[1,`i'] = r(r1) } } // add a constant to covariate support matrix `wsupp' = (`wsupp', J(rowsof(`wsupp'), 1, 1)) matrix colnames `wsupp' = `W' _const if `debug'{ tempname wsuppt matrix `wsuppt' = `wsupp'' noi di "covariate support to be used is:" _continue matrix list `wsuppt' } } // for non-conditional statistics, just save the covariates to a matrix else { mkmat `W' if `touse', matrix(`wsupp') matrix `wsupp' = (`wsupp', J(`nobs', 1, 1)) matrix colnames `wsupp' = `W' } // ========================================================================= // 3. Calculate propensity score(s) // ========================================================================= qui logit `X' `W' if `touse', nolog matrix `pindex' = `wsupp' * e(b)' mata: p1 = invlogit(st_matrix("`pindex'")) // mata invlogit is vectorized mata: p0 = 1 :- p1 mata: st_matrix("`p1'", p1) mata: st_matrix("`p0'", p0) mata: st_numscalar("`pmax'", max((p1[,1] \ p0[,1]))) mata: mata drop p0 p1 // TODO: For averages, it would be better to get max for each obs if `cond_stat' & `debug'{ di as text "propensity scores are p0: " as result `p0' as text " p1: " as result `p1' } // ========================================================================= // 4. Steps required for particular statistics // ========================================================================= // These are steps that are calculated once and used as an input to the // functions that calculate the stat bounds for each c-dependence value // ========================================================================= // 4.1 (Averaged stats: calculate interpolation nodes) // ========================================================================= // TODO: this should be moved into mata if possible to avoid just leaving // pchipcoefs in mata global memory if !`qtl_stat' & !`binary_outcome'{ if `verbose' nois _dots 0, title(calculating interpolation nodes) reps(`nodes') // TODO: move this to mata forvalues j = 1(1)`nodes' { tempname coef local x_`j' = -cos((2*`j'-1)*(_pi)/(2*`nodes')) local x_`j' = (`x_`j'' + 1) * (1/2) capture qui qreg `Y' `X' `W' if `touse', quantile(`x_`j'') matrix `xnodes' = (nullmat(`xnodes') \ `x_`j'') matrix `coef' = e(b) matrix `ynodes' = (nullmat(`ynodes') \ `coef') if `verbose' nois _dots `j' 0 } // Debugging: show quantile function interpolated if `debug' { svmat `xnodes', n(xnodes) svmat `ynodes', n(matcol) foreach v of varlist `W'{ twoway scatter `ynodes'`v' xnodes, msize("vsmall") name("`v'") qui drop `ynodes'`v' } qui drop xnodes } // Form the pointer array of pchip coeficients mata: pchipcoefs = pchip_all_coef(st_matrix("`xnodes'"), /// st_matrix("`ynodes'"), `nvars') } // ========================================================================= // 4.2 (Quantile stats: approximate quantile function limits) // ========================================================================= // TODO: better way to do this? if "`stat'" == "cqte" { local alpha_accel = 0.5 local iterate 30 // approximate quantile coefficients for tau -> 1 local tau = 0.01 capture qreg `Y' `X' `W' if `touse', quantile(`=1-`tau'') matrix `bscoef1' = e(b) if `verbose' nois _dots 0, title(calculating approximate upper limit quantile function) reps(`iterate') forvalues k=1(1)`iterate' { if `verbose' nois _dots `k' 0 local tau = `alpha_accel'* `tau' capture qreg `Y' `X' `W' if `touse', quantile(`=1-`tau'') mata: mat_eq("`bscoef1'", "e(b)", "check") matrix `bscoef1' = e(b) if `check' == 1 { continue, break } } if `verbose' nois di // approximate quantile coefficients for tau -> 0 local tau = 0.01 capture qreg `Y' `X' `W' if `touse', quantile(`tau') matrix `bscoef0' = e(b) if `verbose' nois _dots 0, title(calculating approximate lower limit quantile function) reps(`iterate') forvalues k=1(1)`iterate' { if `verbose' nois _dots `k' 0 local tau = `alpha_accel' * `tau' capture qreg `Y' `X' `W' if `touse', quantile(`tau') mata: mat_eq("`bscoef0'", "e(b)", "check") matrix `bscoef0' = e(b) if `check' == 1 { continue, break } } if `verbose' nois di } // ========================================================================= // 4.3 (ATET: unconditional expectations) // ========================================================================= if "`stat'" == "atet"{ qui reg `Y' `X' if `touse' matrix `exp_y1' = (1,1) * (e(b))' scalar `exp_y1' = `exp_y1'[1,1] matrix `exp_y0' = (0,1) * (e(b))' scalar `exp_y0' = `exp_y0'[1,1] qui summarize `X' if `touse' scalar `up1' = 1/r(N) * r(sum) scalar `up0' = 1/r(N) * (r(N) - r(sum)) } // ========================================================================= // 4.4 (Binary ATE: outcome probabilities) // ========================================================================= if "`stat'" == "`ate'" & `binary_outcome'{ qui logit `Y' `X' `W' if `touse', nolog // Run a logit of Y on (X, W) local nobs : rowsof(`wsupp') // calculate: \hat{P}(Y=1 | X=1, W=w) = \Lambda( \hat{\beta}' [1 W] ) mata: p11 = invlogit((J(`nobs',1,1), st_matrix("`wsupp'")) * st_matrix("e(b)")') // calculate: \hat{P}(Y=1 | X=0, W=w) = \Lambda( \hat{\beta}' [0 W] ) mata: p10 = invlogit((J(`nobs',1,0), st_matrix("`wsupp'")) * st_matrix("e(b)")') mata: st_matrix("`p10'", p10) mata: st_matrix("`p11'", p11) mata: mata drop p10 p11 } // ========================================================================= // 5. Form matrix of c-dependence values // ========================================================================= // uniform grid over range of c mata : cgrid = rangen(0, 1, strtoreal(st_local("cgrid"))) mata : st_matrix("`call'", cgrid) mata : mata drop cgrid forvalues i = 1/`cgrid' { local gridnames `"`gridnames' grid`i'"' } matrix rownames `call' = `gridnames' matrix roweq `call' = "grid" // add reference c-dependence values if requested if `"`creference'"' != ""{ _tesen_cscale `X' `W' if `touse', cmax fullmodel matrix `cref' = r(cmax) mata : sort_st_matrix("`cref'", 1) matrix roweq `cref' = "cref" matrix `call' = (`call' \ `cref') } // add the max c (used in calculations) matrix `call' = (`pmax' \ `call') // total number of observations local cnum = rowsof(`call') // ========================================================================= // 6. Calculate c-dependence bounds on statistics // ========================================================================= // form arguments for statistic selected (documented below in section A) if "`stat'" == "cqte" { local c_function_args `"`quantile' `p0' `p1' `wsupp' "`bscoef0'" "`bscoef1'" "`Y' `X' `W'" `touse'"' } else if "`stat'" == "atet" { local c_function_args `"`p0' `p1' `wsupp' pchipcoefs `nvars' `nobs' `exp_y0' `exp_y1' `up0' `up1'"' } else if "`stat'" == "qte" { local c_function_args `"`quantile' `p0' `p1' `wsupp' `tol' `Y' `X' "`W'" `touse'"' } else if "`stat'" == "ate" & `binary_outcome' { local c_function_args `"`p0' `p1' `p10' `p11'"' } else { // ate (continous) + cate local c_function_args `"`p0' `p1' `wsupp' pchipcoefs `nvars' `nobs'"' } // initialize output matrices matrix `c_table' = J(`cnum', 2, .) // calculate bounds for each c-dependence value if `verbose' nois _dots 0, title(calculating `stat') reps(`cnum') forvalues i=1/`cnum'{ local c = `call'[`i', 1] if `c' <= `pmax' { `c_function' `c' `c_function_args' matrix `c_table'[`i', 1] = `s(lower)' matrix `c_table'[`i', 2] = `s(upper)' } else { matrix `c_table'[`i', 1] = `c_table'[1,1] matrix `c_table'[`i', 2] = `c_table'[1,2] } if `verbose' nois _dots `i' 0 } if `verbose' nois di // form full c_table and drop the max value matrix `c_table' = (`call', `c_table') matrix `c_table' = `c_table'[2...,1...] mata: sort_st_matrix("`c_table'", 1) // ========================================================================= // 6.1 Monotonize the output // ========================================================================= mata: apply_cumfunc("`c_table'", 2, &min()) mata: apply_cumfunc("`c_table'", 3, &max()) // ========================================================================= // 7. Calculate breakdown point // ========================================================================= if `breakdown' < . { if `verbose' nois di as text "calculating breakdown point..." _breakdown `c_table' `tol' `breakdown' `c_function' `"`c_function_args'"' local c_breakdown `s(output)' } else { local c_breakdown . } // ========================================================================= // 8. Return results // ========================================================================= // NOTES: // - for compatibility with Stata's ereturn functionality, results are // stored in e(b). The same information is stored in the matrix // e(c_table) which is organized intuitively (col 1: c values, // cols 2-3: lower, upper bounds) // - in order to use ereturn functionality, have to store a covariance // matrix e(V). Since we aren't dealing with inferent yet, this is just // the identify matrix. // save all results in e(b) following normal naming conventions matrix `lb' = `c_table'[1...,2]' matrix `ub' = `c_table'[1...,3]' forvalues i = 1/`=`cnum' - 1' { local lbnames `"`lbnames' lb:c`i'"' local ubnames `"`ubnames' ub:c`i'"' } matrix colnames `lb' = `lbnames' matrix colnames `ub' = `ubnames' matrix `b' = (`lb', `ub') local nb : colsof(`b') local allnames : colnames(`b') local alleq : coleq(`b') ereturn post `b', esample(`touse') depname(`Y') properties(b) // save results in c_table matrix (sorted) mata: sort_st_matrix("`c_table'", 1) // save the rest of the results if "`creference'" != "" { ereturn matrix cref `cref', copy } ereturn matrix covsupp = `wsupp', copy ereturn matrix c_table = `c_table', copy ereturn hidden local estat_cmd _tesen_estat ereturn local tmodel logistic if `binary_outcome'{ ereturn local omodel logistic } else { ereturn local omodel linear quantile } ereturn local tvar `X' ereturn local stat `stat' ereturn local cmdline = stritrim(`"`0'"') ereturn local subcmd cpi ereturn local cmd tesensitivity ereturn scalar N = `nobs' ereturn scalar y_breakdown = `breakdown' ereturn scalar c_breakdown = `c_breakdown' // ========================================================================= // 8.1 Cleanup // ========================================================================= if !`qtl_stat' & !`binary_outcome'{ mata: mata drop pchipcoefs } end // ============================================================================= // A.1 Bounds for a single c-dependence value for each statistic // ----------------------------------------------------------------------------- // Notes: // - these are primarily a stata interface to the mata functions in B.1 // ============================================================================= // PROGRAM: CQTE, single c-dependence value // DESCRIPTION: Calculate bounds on the cqte for a given c-dependence value // INPUT: // - c: (real scalar) c-dependence value // - qtl: (real scalar) quantile // - p0, p1: (real scalars) probability of treatment for person with given covariates // - bscoef0, bscoef1: (real scalars) approximate cqte values for c = 0,1 // - varlist: (varlist) variables to be used in quantile regressions (Y X W) // - touse: (varname) sample flag // TODO: Most of this should be moved to mata program define cqte_bounds_c, sclass args c qtl p0 p1 wsupp bscoef0 bscoef1 varlist touse tempname qtl_bounds qtl_coef xw // calculate the arguments to the quantile function given c, p1, p0 mata: qtls = cqtl_arg_c(`c', st_matrix("`p0'"), st_matrix("`p1'"), `qtl') mata: st_matrix("qtls", qtls) // calculate the upper and lower bound on the quantile function for // each potential outcome // There are 4 values: // 1. lower bound, x = 1 // 2. upper bound, x = 0 // 3. upper bound, x = 1 // 4. lower bound, x = 0 matrix `qtl_bounds' = J(4, 1, .) forvalues j = 1/4 { // select the right covariates for each set of coefficients // odd are treatment, even are control local nobs : rowsof(`wsupp') if mod(`j', 2) == 0{ matrix `xw' = (J(`nobs', 1, 0), `wsupp') } else { matrix `xw' = (J(`nobs', 1, 1), `wsupp') } // calculate the quantile regression if not on the boundary // use the approximate boundary coeficients if not local qtl = qtls[`j', 1] if (`qtl' > 0) & (`qtl' < 1) { capture qreg `varlist' if `touse', quantile(`qtl') matrix `qtl_coef' = e(b) } else if(`qtl' == 0) { matrix `qtl_coef' = `bscoef0' } else if(`qtl' == 1) { matrix `qtl_coef' = `bscoef1' } // calcaulte the quantile function value matrix `qtl_bounds'[`j', 1] = `xw' * `qtl_coef'' } // take differences to calculate CQTE mata: qtl_bounds = st_matrix("`qtl_bounds'") mata: qtl_bounds = rowshape(qtl_bounds, 2) mata: cqtl_bounds = qtl_bounds[,1] - qtl_bounds[,2] mata: st_local("lower", strofreal(cqtl_bounds[1,1])) mata: st_local("upper", strofreal(cqtl_bounds[2,1])) mata: mata drop cqtl_bounds qtl_bounds qtls sreturn local lower = `lower' sreturn local upper = `upper' end // PROGRAM: CATE bounds, single c-dependence value // DESCRIPTION: Calculate bounds on the cate for a given c-dependence value // INPUT: // - c: (real scalar) c-dependence value // - p0, p1: (real scalars) probability of treatment for person with given covariates // - wsupp: (stata matrix) matrix of covariates (1 x nvars) // - nvars: (scalar int) number of covariates + intercept + treatment // - nobs: (scalar int) number of observations program define cate_bounds_c, sclass args c p0 p1 wsupp pchipcoefs nvars nobs tempname bounds mata: p0 = st_matrix("`p0'") mata: p1 = st_matrix("`p1'") mata: bds = cate_integral_c(`c', p0, /// p1, st_matrix("`wsupp'")', /// `pchipcoefs', `nvars', 1, 0) mata: st_matrix("`bounds'", bds) mata: mata drop p0 p1 bds sreturn local lower = `bounds'[1,1] sreturn local upper = `bounds'[2,1] end // PROGRAM: ATE bounds, single c-dependence value, continuous outcome // DESCRIPTION: Calculate bounds on the ate (continuous) for a given c-dependence value // INPUT: // - c: (real scalar) c-dependence value // - p0, p1: (real col vectors) probability of treatment for each observation // - wsupp: (stata matrix) matrix of covariates (nobs x K) // - pchipcoefs: (mata pointer arrray) matrix of pointers to pchip coefficients // (i.e., name of an object in mata global memory) // - nvars: (scalar int) number of covariates + intercept + treatment // - nobs: (scalar int) number of observations program ate_bounds_c, sclass args c p0 p1 wsupp pchipcoefs nvars nobs tempname bounds mata: bds = J(2, `nobs', .) mata: p0 = st_matrix("`p0'") mata: p1 = st_matrix("`p1'") forvalues n=1/`nobs'{ mata: bds[,`n'] = cate_integral_c(`c', p0[`n', 1], p1[`n', 1], /// st_matrix("`wsupp'")[`n',]', /// `pchipcoefs', `nvars', 1, 0) } mata: st_matrix("`bounds'", mean(bds')) mata: mata drop p0 p1 bds sreturn local lower = `bounds'[1, 1] sreturn local upper = `bounds'[1, 2] end // PROGRAM: ATE bounds, single c-dependence value, binary outcome // DESCRIPTION: Calculate bounds on the ate (binary) for a given c-dependence value // INPUT: // - c: (real scalar) c-dependence value // - p0, p1: (real col vectors) probability of treatment for each observation // - p10, p11: (real col vectors) probablity outcome = 0,1 conditional on X = 1 // for each observation program binary_ate_bounds_c, sclass args c p0 p1 p10 p11 tempname bounds mata: bds = binary_ate_c(`c', st_matrix("`p0'"), st_matrix("`p1'"), st_matrix("`p10'"), st_matrix("`p11'")) mata: st_matrix("`bounds'", bds) mata: mata drop bds sreturn local lower = `bounds'[1, 1] sreturn local upper = `bounds'[1, 2] end // PROGRAM: ATET bounds, single c-dependence value // DESCRIPTION: Calculate bounds on the atet for a given c-dependence value // INPUT: // - c: (real scalar) c-dependence value // - p0, p1: (real col vectors) probability of treatment for each observation // - wsupp: (stata matrix) matrix of covariates (nobs x K) // - pchipcoefs: (mata pointer arrray) matrix of pointers to pchip coefficients // (i.e., name of an object in mata global memory) // - nvars: (scalar int) number of covariates + intercept + treatment // - nobs: (scalar int) number of observations // - exp_y0: (stata scalar real) unconditional expectation of Y | X = 0 // - exp_y1: (stata scalar real) unconditional expectation of Y | X = 1 // - up0: (stata scalar real) unconditional probability of X = 0 // - up1: (stata scalar real) unconditional probability of X = 1 // TODO: Additional calculations should be moved to mata program atet_bounds_c, sclass args c p0 p1 wsupp pchipcoefs nvars nobs exp_y0 exp_y1 up0 up1 tempname ub lb mata: bds = J(2, `nobs', .) mata: p0 = st_matrix("`p0'") mata: p1 = st_matrix("`p1'") forvalues n=1/`nobs'{ mata: bds[,`n'] = cate_integral_c(`c', p0[`n', 1], p1[`n', 1], /// st_matrix("`wsupp'")[`n',]', /// `pchipcoefs', `nvars', 0, 1) } mata: bds = mean(bds') mata: lower = st_numscalar("`exp_y1'") - /// ((bds[1,2] - st_numscalar("`up0'")*st_numscalar("`exp_y0'")) /// / st_numscalar("`up1'")) mata: upper = st_numscalar("`exp_y1'") - /// ((bds[1,1] - st_numscalar("`up0'")*st_numscalar("`exp_y0'")) /// / st_numscalar("`up1'")) mata: st_numscalar("`ub'", upper) mata: st_numscalar("`lb'", lower) mata: mata drop p0 p1 bds sreturn local lower = `lb' sreturn local upper = `ub' end // PROGRAM: QTE bounds, single c-dependence value // DESCRIPTION: Calculate bounds on the qte for a given c-dependence value // INPUT: // - c: (real scalar) c-dependence value // - qtl: (real scalar) quantile // - p0, p1: (real col vectors) probability of treatment for each observation // - wsupp: (stata matrix) matrix of covariates (nobs x K) // - tol: (scalar real) precision tolerance used in quantile calculations // - Y, X, W: (varlists) variables used for quantile regressions // - touse: (varname) sample flag program qte_bounds_c, sclass args c qtl p0 p1 wsupp tol Y X W touse // TODO: the qte auxiliary functions uses these directly from mata global // memory - clean this up so they are passed by the functions rather // than hanging around in mata global memory local nobs : rowsof(`wsupp') mata: xw0 = (J(`nobs', 1, 0), st_matrix("`wsupp'")) mata: xw1 = (J(`nobs', 1, 1), st_matrix("`wsupp'")) mata: p0 = st_matrix("`p0'") mata: p1 = st_matrix("`p1'") LQ `c' `Y' `X' "`W'" `touse' `tol' `qtl' 0 local LQ0 `s(LQ0)' UQ `c' `Y' `X' "`W'" `touse' `tol' `qtl' 1 local UQ1 `s(UQ1)' local UQTE = `UQ1' - `LQ0' LQ `c' `Y' `X' "`W'" `touse' `tol' `qtl' 1 local LQ1 `s(LQ1)' UQ `c' `Y' `X' "`W'" `touse' `tol' `qtl' 0 local UQ0 `s(UQ0)' local LQTE = `LQ1' - `UQ0' mata: mata drop xw1 xw0 p1 p0 // TODO: these are other mata variables left in global memory by the // auxiliary functions - should be cleaned up mata: mata drop Fy0w Fy1w LF0W LF1W UF0W UF1W index vecLF vecUF yhi ylow sreturn local upper = `UQTE' sreturn local lower = `LQTE' end // ============================================================================= // A.2 Programs for calculating the breakdown point // ============================================================================= // PROGRAM: breakdown point // DESCRIPTION: calculate the breakdown point for treatment effect statistic // INPUT: // - c_table: (stata matrix) col.1 = c-dep vals, cols 2,3 = lower, upper bounds // - tol: (scalar real) precision tolerance for breakdown point // - breakdown_y (scalar real) value for conclusion (stat > breakdown_y) // - function (function) function to calculate stat bounds for given c-dep. // - arguments (anything) list of arguments passed to the fucntion // IMPLEMENTATION NOTES: // - the grid of c-dependence is ordered, and the upper (lower) bounds are // increasing (decreasing) monotonically with c. To start the bisection // algorithm, find all the indicies where the lower (upper) bound is above // (below) the conclusion value. This is the lower bound for the breakdown // point, and the next c is the uppder bound. // - The clow, chi values passed to the bisection algorithm are the values of // c such that bound(clow) < breakdown_y < bound(chi), where "bound" is either // the lower or upper bound function, depending on whether the bound(0) is // above or below breakdown_y. If it is above (below), then we are looking for // where the lower (upper) bound crosses breakdown_y. // - If bound(0) < breakdown_y, then bound(clow) < bound(chi), otherwise the // reverse. // - the Mata function `breakdown_start_points` is written to handle this. It // saves the correct values to stata macros clow, chi. program define _breakdown, sclass args c_table tol breakdown_y function arguments // get the values of the c-grid above and below the breakdown point if `c_table'[1,2] > `breakdown_y' { local side "lower" } else { local side "upper" } mata: breakdown_start_points("`c_table'", `breakdown_y', "`side'") // check if breakdown point = 1 (i.e., no breakdown), otherwise run bisection algorithm if `clow' == 1 { sreturn local output = 1 } else { bisection `clow' `chi' `side' `tol' `breakdown_y' `function' `"`arguments'"' sreturn local output = `s(output)' } end // PROGRAM: bisection algorithm // INPUT // - clow (scalar real) value of c such that bound(clow) < y bound(chi) where // bound is the bound that could cross y. // - chi (scalar real) see above // - side (scalar string) "lower"/"upper": bound that could cross y // - tol: (scalar real) precision tolerance // - y (scalar real) value for conclusion (stat > y) // - function (function) function to calculate stat bounds for given c-dep. // - arguments (anything) list of arguments passed to the fucntion // IMPLEMENTATION NOTES: // - all functions must take as their first argument the value of c-dependence. // After that, the function can take an arbitrary number of arguments. // - also used in the calculate of qte program define bisection, sclass args clow chi side tol y function arguments local cmid = (`chi' + `clow')/2 `function' `cmid' `arguments' local fcmid `s(`side')' if abs(`fcmid' - `y') < `tol' | abs((`chi' - `clow')/2) < `tol' { sreturn local output = `cmid' } else { if `fcmid' < `y' { local clow = `cmid' } else { local chi = `cmid' } bisection `clow' `chi' `side' `tol' `y' `function' `"`arguments'"' } end // TODO: this is going to calculate both bounds and throw one away // is this a significant efficiency loss? // ============================================================================= // A.3 Auxiliary code for QTE calculation // ============================================================================= // TODO: This should be moved to mata and/or documented program define UF1, sclass // The upper bound on F_{Y_1}(y) args gridy gridc Y X W touse // Estimate F_{Y|X,W}(y | x,w) tempvar indicatorY qui gen `indicatorY' = (`Y' <= `gridy') if `touse' // indicator(Y<=y) qui logit `indicatorY' `X' `W' if `touse', nolog // logit indicator(Y<=y) on (1,X,W) mata: Fy1w = invlogit(xw1 * st_matrix("e(b)")') // coefficient*(X,W,1) mata: UF1W = min3(((p1 :* Fy1w) :/ (p1 :- `gridc')) :* (p1 :> `gridc') + (p1 :<= `gridc'), (p1 :* Fy1w :+ `gridc') :/ (p1 :+ `gridc'), p1 :* Fy1w :+ (1 :- p1)) mata: st_local("UF1", strofreal(mean(UF1W))) sreturn local bound = `UF1' end program define UF0, sclass // The upper bound on F_{Y_0}(y) args gridy gridc Y X W touse // Estimate F_{Y|X,W}(y | x,w) tempvar indicatorY qui gen `indicatorY' = (`Y' <= `gridy') if `touse' // indicator(Y<=y) qui logit `indicatorY' `X' `W' if `touse', nolog // logit indicator(Y<=y) on (1,X,W) mata: Fy0w = invlogit(xw0 * st_matrix("e(b)")') // coefficient*(X,W,1) mata: UF0W = min3(((p0 :* Fy0w) :/ (p0 :- `gridc')) :* (p0 :> `gridc') + (p0 :<= `gridc'), (p0 :* Fy0w :+ `gridc') :/ (p0 :+ `gridc'), p0 :* Fy0w :+ (1 :- p0)) mata: st_local("UF0", strofreal(mean(UF0W))) sreturn local bound = `UF0' end program define LF1, sclass // The lower bound on F_{Y_1}(y) args gridy gridc Y X W touse // Estimate F_{Y|X,W}(y | x,w) tempvar indicatorY qui gen `indicatorY' = (`Y' <= `gridy') if `touse' // indicator(Y<=y) qui logit `indicatorY' `X' `W' if `touse', nolog // logit indicator(Y<=y) on (1,X,W) mata: Fy1w = invlogit(xw1 * st_matrix("e(b)")') // coefficient*(X,W,1) mata: LF1W = max3((p1 :* Fy1w) :/ (p1 :+ `gridc'), ((p1 :* Fy1w :- `gridc') :/ (p1 :- `gridc')) :* (p1 :> `gridc'), p1 :* Fy1w) mata: st_local("LF1", strofreal(mean(LF1W))) sreturn local bound = `LF1' end program define LF0, sclass // The lower bound on F_{Y_0}(y) args gridy gridc Y X W touse // Estimate F_{Y|X,W}(y | x,w) tempvar indicatorY qui gen `indicatorY' = (`Y' <= `gridy') if `touse' // indicator(Y<=y) qui logit `indicatorY' `X' `W' if `touse', nolog // logit indicator(Y<=y) on (1,X,W) mata: Fy0w = invlogit(xw0 * st_matrix("e(b)")') // coefficient*(X,W,1) mata: LF0W = max3((p0 :* Fy0w) :/ (p0 :+ `gridc'), ((p0 :* Fy0w :- `gridc') :/ (p0 :- `gridc')) :* (p0 :> `gridc'), p0 :* Fy0w) mata: st_local("LF0", strofreal(mean(LF0W))) sreturn local bound = `LF0' end program define UQ, sclass // the upper bound on Q_{Y_x}(tau) args gridc Y X W touse tol tau treat // treat=1 or 0 tempname ymax ymin vecLF qui sum `Y' if `touse' scalar `ymax' = r(max) scalar `ymin' = r(min) forvalues i = 2(1)9 { local y`i' = (`ymax' - `ymin') * (`=`i'-1'/9) + `ymin' LF`treat' `y`i'' `gridc' `Y' `X' "`W'" `touse' // lower bound for F_{Y_x}(y) local LF`treat'`i' `s(bound)' } local y1 `ymin' local y10 `ymax' local LF`treat'1 0 // h(y_min)=0 local LF`treat'10 1 // h(y_max)=1 forvalues i = 1(1)10 { matrix `vecLF' = (nullmat(`vecLF'), `LF`treat'`i'') // a vector of bound functions on the grid of y's } mata: vecLF = (st_matrix("`vecLF'"))' mata: index = selectindex((vecLF:<`tau')) // marks all the index of vecLF which is < y mata: ylow = index[rows(index),.] //corresponds to the largest LF that is smaller than y mata: yhi = ylow + 1 mata: st_local("yhi", strofreal(yhi)) mata: st_local("ylow", strofreal(ylow)) local yhi = `y`yhi'' local ylow = `y`ylow'' bisection `ylow' `yhi' bound `tol' `tau' LF`treat' `"`gridc' `Y' `X' "`W'" `touse'"' sreturn local UQ`treat' = `s(output)' end program define LQ, sclass // the lower bound on Q_{Y_x}(tau) args gridc Y X W touse tol tau treat // treat=1 or 0 tempname ymax ymin vecUF qui sum `Y' if `touse' scalar `ymax' = r(max) scalar `ymin' = r(min) forvalues i = 2(1)9 { local y`i' = (`ymax' - `ymin') * (`=`i'-1'/9) + `ymin' UF`treat' `y`i'' `gridc' `Y' `X' "`W'" `touse' // lower bound for F_{Y_x}(y) local UF`treat'`i' `s(bound)' } local y1 `ymin' local y10 `ymax' local UF`treat'1 0 // h(y_min)=0 local UF`treat'10 1 // h(y_max)=1 forvalues i = 1(1)10 { matrix `vecUF' = (nullmat(`vecUF'), `UF`treat'`i'') // a vector of bound functions on the grid of y's } mata: vecUF = (st_matrix("`vecUF'"))' mata: index = selectindex((vecUF:<`tau')) // marks all the index of vecUF which is < y mata: ylow = index[rows(index),.] //corresponds to the largest UF that is smaller than y mata: yhi = ylow + 1 mata: st_local("yhi", strofreal(yhi)) mata: st_local("ylow", strofreal(ylow)) local yhi = `y`yhi'' local ylow = `y`ylow'' bisection `ylow' `yhi' bound `tol' `tau' UF`treat' `"`gridc' `Y' `X' "`W'" `touse'"' sreturn local LQ`treat' = `s(output)' end // ============================================================================= // A.4 Miscellaneous utilities // ============================================================================= // PROGRAM: Check Binary // DESCRIPTION: Checks if variables have binary support // INPUT: // - varlist (varlist) variables to check // - fatal (int 0/1) throw error if doesn't match `expect` // - expect (int 0/1) expect to be binary or note // - expect_supp (numlist) expected support values // RETURN: if not fatal, return [varname]_binary for each variable (1 for binary) program _checkbinary, sclass syntax varlist, [fatal expect(integer 1) expect_supp(numlist)] foreach v of varlist `varlist'{ qui levelsof `v', local(lvls) local nsupp: word count `lvls' local binary 1 if `nsupp' > 2 { local binary 0 } if "`fatal'" != "" & (`expect' != `binary') { di as error "expected `v' to have binary support, but it had `nsupp' support points" qui error 109 } sreturn local `v'_binary = `binary' if "`expect_supp'" != "" & "`fatal'" != "" { local levels_match : list expect_supp == lvls if !`levels_match' { di as error "expected `v' to have support `expect_supp' but had support `lvls'" qui error 109 } } } end // ============================================================================= // B. Mata Functions // ============================================================================= mata: // ========================================================================= // B.1 Calculation of Treatment Effect statistics // ========================================================================= // FUNCTION: Binary ATE Bounds // DESCRIPTION: Calculate bounds on the ATE for a given c-dependence value // with a binary outcome variable. // INPUT: // - c: c-dependence value // - p1, p0: N X 1 vector of propensity scores // - p0: 1 - p1 // - p11: N X 1 vector of P(Y = 1|X = 1, W = w_i) // - p10: N X 1 vector of P(Y = 1|X = 0, W = w_i) // RETURN: matrix (1 x 2): lower, upper bounds on ATE numeric matrix binary_ate_c(c, p0, p1, p10, p11) { UP1 = rowmin(( ((p11 :* p1) :/ (p1 :- c)) :* (p1 :> c) + (p1 :<= c), ((p11 :* p1 :+ c)) :/ (p1 :+ c), (p11 :* p1) :+ (1 :- p1) )) LP1 = rowmax(( (p11 :* p1) :/ (p1 :+ c), (((p11 :* p1) :- c) :/ (p1 :- c)) :* (p1 :> c), p11 :* p1 )) UP0 = rowmin(( ((p10 :* p0) :/ (p0 :- c)) :* (p0 :> c) :+ (p0 :<= c), ((p10 :* p0 :+ c)) :/ (p0 :+ c), (p10 :* p0) :+ (1 :- p0) )) LP0 = rowmax(( (p10 :* p0) :/ (p0 :+ c), (((p10 :* p0) :- c) :/ (p0 :- c)) :* (p0 :> c), p10 :* p0 )) return(mean((LP1 - UP0, UP1 - LP0)), 1) } // FUNCTION: Conditional Quantile Argument // DESCRIPTION: Calculate the arguments to the quantile function which are // used to calculate upper and lower bounds on the quantile for a // given a value of c-dependence. // INPUT: // - c: scalar, c-dependence // - p0, p1: scalar: propensity scores (and 1 - prop score) // - qtl: scalar: quantile // RETURN: matrix (2 x 2): coefficients for argument to the quantile function. // call the returned matrix m: // - lower bound arg = m[1,1] + m[1,2] * quantile, // - upper bound arg = m[2,1] + m[2,2] * quantile numeric colvector cqtl_arg_c(c, p0, p1, qtl) { limits = qtl_bound(c, p0, p1) coef = qtl_coef(c, p0, p1) qtl_select = (limits[, 1] :<= J(8, 1, qtl)) :& (J(8, 1, qtl) :< limits[, 2]) coef = select(coef, qtl_select) return(coef * (1 \ qtl)) } // FUNCTION: CATE integral, single c dependence // DESCRIPTION: Calculate bounds on the conditional average treatment effect for // for a given value of c-dependence. // INPUT: // - c: scalar, c-depenendence value // - p0, p1: scalars: propensity scores (and 1 - prop score) // - cov: colvector (K x 1): values of covariates // - pchipcoefs: pointer matrix (1 x K): pointer to pchip coeficients for each // covariate (for interpolated quantile regression coefficient function) // - nvars: scalar: number of covariates (including constant, but not treatment) // - cate, catt: logical: indicators to return cate and/or catt // RETURN: colvector (2 x 1): lower, upper bound on CATE / CATT numeric matrix cate_integral_c(c, p0, p1, cov, pchipcoefs, nvars, cate, catt) { int_segs = J(8, 1, .) qtl_coef= J(4, nvars,.) // calculate the coeficients and limits limits = qtl_bound(c, p0, p1) coef = qtl_coef(c, p0, p1) // calculate the integrals // outer loop: covariates // inner loop: intervals of integrals // TODO: is there a more efficient way to do this to avoid inner loop? for (k=1; k<=nvars; k++){ for (i=1; i<=8; i++){ int_segs[i,1] = analyint(limits[i, 1], limits[i,2], coef[i,2], coef[i,1], *pchipcoefs[1,k]) } // combine intervals of integration // NOTE: rowshape converts (1 \ ... \ 8) -> (1, 2 \ 3, 4 \ ...). // given the order returned by `qtl_coef`, `qtl_bound`, this // puts the segments of each integral in a row, so rowsum // calculates the full integral qtl_coef[,k] = rowsum(rowshape(int_segs, 4)) } // calculate bounds on CATE if (cate == 1){ cate_bounds = J(2,1,.) control_cov = (0 \ cov) treat_cov = (1 \ cov) cate_bounds[1,1] = qtl_coef[1,] * treat_cov - qtl_coef[2,] * control_cov cate_bounds[2,1] = qtl_coef[3,] * treat_cov - qtl_coef[4,] * control_cov return(cate_bounds) } // calculate bounds on CATT if (catt == 1){ exp_y0_bounds = J(2,1,.) control_cov = (0 \ cov) exp_y0_bounds[1,1] = qtl_coef[4,] * control_cov // lower exp_y0_bounds[2,1] = qtl_coef[2,] * control_cov // upper return(exp_y0_bounds) } } // FUNCTION: Quantile Coefficients // DESCRIPTION: Returns the coeficients used to calculate the // argument to the quantile function in the max/min quantile // functions. These are documented in appendix B of an // earlier unpublished version of MPZ2020. // INPUT: // - c: scalar, c-dependence values // - p0, p1: scalars: propensity scores (and 1 - prop score) // RETURN: matrix of coefficients (2 x 8). Columns: constant, slope. Rows: // There are 8 sets of coefficients, which are the combinations of // the treamtent, lower/upper bound, and the range the quantiles // value falls into. The rows are ordered as they are used in calculations // of the ATE: // 1. t1 , lower, seg1 // 2. ..., ..., seg2 // 3. t0 , upper, seg1 // 4. ..., ..., seg2 // 5. t1 , upper, seg1 // 6. ..., ..., seg2 // 7. t0 , lower, seg1 // 8. ..., ..., seg2 numeric matrix qtl_coef(c, p0, p1) { numeric matrix coef if ((c < p0) & (c < p1)) { coef = (0 , 1 - c/p1 \ - c/p1, 1 + c/p1 \ 0 , 1 + c/p0 \ c/p0 , 1 - c/p0 \ 0 , 1 + c/p1 \ c/p1 , 1 - c/p1 \ 0 , 1 - c/p0 \ - c/p0, 1 + c/p0 ) } else if ((p0 <= c) & (c < p1)) { coef = (0 , 1 - c/p1 \ 1 - 1/p1, 1/p1 \ 0 , 1 + c/p0 \ 1 , 0 \ 0 , 1/p1 \ c/p1 , 1 - c/p1 \ 0 , 0 \ -c/p0 , 1 + c/p0 ) } else if ((p1 <= c) & (c < p0)) { coef = (0 , 0 \ -c/p1 , 1 + c/p1 \ 0 , 1/p0 \ c/p0 , 1 - c/p0 \ 0 , 1 + c/p1 \ 1 , 0 \ 0 , 1 - c/p0 \ 1 - 1/p0, 1/p0 ) } else if ((p0 <= c) & (p1 <= c)){ coef = (0 , 0 \ 1 - 1/p1 , 1/p1 \ 0 , 1/p0 \ 1 , 0 \ 0 , 1/p1 \ 1 , 0 \ 0 , 0 \ 1 - 1/p0 , 1/p0 ) } return(coef) } // FUNCTION: Quatile Bounds // DESCRIPTION: The coefficients returned by qtl_coef depend on ranges // that the quantile falls into. This function returns the corresponding // matrix of lower and upper limits of those ranges for each set of // coefficients. See Annex A of MPL2020 // RETURN: matrix of lower and upper limits. Columns: lower limit, upper limit. // Rows: ordered as in the output of `qtl_coef` numeric matrix qtl_bound(c, p0, p1) { numeric matrix bounds if ((c < p0) & (c < p1)) { bound1 = .5 bound2 = .5 bound3 = .5 bound4 = .5 } else if ((p0 <= c) & (c < p1)) { bound1 = (1 - p1) / (1 - (p1 - c)) bound2 = 1 / (1 + (c/p0)) bound3 = c / (1-(p1 - c)) bound4 = (c/p0) / (1 + c/p0) } else if ((p1 <= c) & (c < p0)) { bound1 = (c/p1) / (1 + c/p1) bound2 = c / (1-(p0 - c)) bound3 = 1 / (1 + (c/p1)) bound4 = (1 - p0) / (1 - (p0 - c)) } else if ((p0 <= c) & (p1 <= c)) { bound1 = p0 bound2 = p0 bound3 = p1 bound4 = p1 } bounds = (0 , bound1 \ bound1 , 1 \ 0 , bound2 \ bound2 , 1 \ 0 , bound3 \ bound3 , 1 \ 0 , bound4 \ bound4 , 1 ) return(bounds) } // ========================================================================= // B.2 PCHIP approximations and integration // ========================================================================= // NOTES: // - This section provides code to construct piecewise cubic hermite // interpolating polynomials (pchip), and calculates interpolated values // and integrals. // FUNCTION: PCHIP All Coefficients // DESCRIPTION: Calculates pchip coefficients for a series of functions // with support on [0,1] given a set of nodes on a common // grid on [a,b], for 0 < a < b < 1 // INPUT: // - xnodes: colvector N x 1, grid of points between [0,1] // - ynodes: matrix N x K, each column are the nodes for a function // (e.g. y[n,k] = f[k](x[n]) // - nvars: ... // RETURN: pointer matrix, 1 x K: each column points to a N x 5 matrix // returned by `pchip` // NOTES: // - Since [a,b] doesn't not cover the full support of the functions, the // approach taken here is to first find the interpolated functions on // [a,b], then find the interpolated values at {0,1} and then recalculate // the pchip coefficients including those points // - TODO: This was in the original code. Is this right? It seems like it // would only make the interpolated function worse to use the // interpolated points at {0,1} pointer matrix pchip_all_coef(xnodes, ynodes, nvars){ pchipcoefs = J(1, nvars, NULL) // calculate all the pchip coefficients excluding 0/1 for (k=1; k<=nvars; k++) { pchipcoefs[1,k] = & (pchip(xnodes, ynodes[,k])) } // calculate the 0/1 nodes by interpolation from the rest of the function coef1 = pchipval(*pchipcoefs[1,1],1) coef0 = pchipval(*pchipcoefs[1,1],0) for (k=2; k<=nvars; k++) { coef1 = (coef1, pchipval(*pchipcoefs[1,k],1)) coef0 = (coef0, pchipval(*pchipcoefs[1,k],0)) } // calcalate all the coefficients given the extended grid xnodes = (0 \ xnodes \ 1) ynodes = (coef0 \ ynodes \ coef1) for (k=1; k<=nvars; k++) { pchipcoefs[1,k] = & (pchip(xnodes, ynodes[,k])) } return(pchipcoefs) } // FUNCTION: PCHIP Coefficients // DESCRIPTION: Calculates coefficients to characterize the piecewise // cubic hermite interpolating polynomial from a grid of nodes. // INPUT: // - x, colvector: a length K grid of function support points // - y, colvector: a length K grid of function values at support grid // RETURN: K x 5 matrix: columns 1-3: coefficients, (b,c,d) as described // in the note below, columns 4-5, values of x,y grids. // NOTES: // - The output of this function characterizes the function P(x) defined // in appendix B of Masten, Poirier and Zhang (2020) // - Rather than store the canonical coefficients of the cubic polynomials, // it is more convenient to coefficients in the following form. Dropping // the grid subscripts so y = y_j, y' = y_{j + 1}, etc., for each interval // there are coefficients (b,c,d) such that P(u) = y + s(d + s(c + sb)), // where s = u - x, for x < u < x'. d is defined in appendix B of MPZ2020 // and calculated by `pchipslopes`. There is an explicit solution for (b,c) // as a function of the x,y grids. // - the b,c coefficients are only used to calculate interpolated values // of the function, via `pchipval`. Calculation of integrals in `analytint` // use only the d coefficients and the grid. real matrix pchip(real colvector x, real colvector y) { real scalar n, nu, k, j real colvector h, delta, d, c, b, which, s n = length(x) h = x[2::n] - x[1::n-1] delta = (y[2::n] - y[1::n-1]) :/ h d = pchipslopes(h, delta) c = (3*delta - 2*d[1::n-1] - d[2::n]) :/ h b = (d[1::n-1] - 2*delta + d[2::n]) :/ (h:^2) pchipcoef = (b,c\.,.) pchipcoef = (pchipcoef,d,y,x) return(pchipcoef) } // FUNCTION: PCHIP slopes // DESCRIPTION: Calculates the d coefficient as defined in the note for // `pchip` // INPUT: // - h: colvector, length K - 1: x_{j+1} - x_j // - delta: colvector, length K - 1: (y_{j+1} - y_j) / (x_{j+1} - x_j) // RETURN: // - d: colvector, length K: defined in MPZ2020 appendix B. (NOTE: this // is the approximate slope, not the exact derivative) real colvector function pchipslopes(real colvector h, real colvector delta) { real scalar n real colvector d, k, w1, w2 n = length(h) + 1 d = J(n, 1, 0) k = 1 :+ select((1::n-2), sign(delta[1::n-2]) :* sign(delta[2::n-1]) :> 0) w1 = 2*h[k] + h[k:-1] w2 = h[k] + 2*h[k:-1] d[k] = (w1 + w2) :/ (w1 :/ delta[k:-1] + w2 :/ delta[k]) d[1] = pchipend(h[1], h[2], delta[1], delta[2]) d[n] = pchipend(h[n-1], h[n-2], delta[n-1], delta[n-2]) return(d) } // FUNCTION: PCHIP end // DESCRIPTION: Calculate the d coefficient as defined in the note for `pchip` // for endpoints in the grid. // INPUT: // - h1, h2: scalars, the first two or last two values of h_j = (x_{j+1} - x_j) // - del1, del2: scalars, the first two or last two values of delta_j = // (y_{j+1} - y_j) / h_j // RETURN: scalar, value of d coefficient at the end point real scalar function pchipend(h1, h2, del1, del2) { real scalar d d = ((2*h1 + h2)*del1 - h1*del2) / (h1 + h2) if (sign(d) != sign(del1)) d = 0 else { if (sign(del1) != sign(del2) & (abs(d) > abs(3*del1))) { d = 3*del1 } } return(d) } // FUNCTION: PCHIP values // DESCRIPTION: Calcalate interpolated points given pchip coefficients // INPUT: // - coef, matrix: pchip coefficients in the form returned by `pchip` // - u, colvector/scalar: values of the support to find interpolated point // RETURN: colvector/scalar: interpolated points. real function pchipval(real matrix coef, real u) { nu = length(u) n = rows(coef) k = J(nu, 1, 1) b = coef[1..(n-1),1] c = coef[1..(n-1),2] d = coef[1..(n-1),3] y = coef[1..(n-1),4] x = coef[,5] if (nu==1) { for (j = 2; j <= n-1; j++) { if (x[j] <= u) k=j } } else { for (j = 2; j <= n-1; j++) { which = select((1::nu), x[j] :<= u) k[which] = J(length(which), 1, j) } } s = u - x[k] return(y[k] + s :* (d[k] + s :* (c[k] + s :* b[k]))) } // FUNCTION: Analytical Integral // DESCRIPTION: Calculate the integral of a PCHIP function with support one // [0,1] // INPUT: // - a, b: scalars, lower and upper bounds of integration // real function analyint(real a, real b, real c, real d, real matrix pchipcoef) { if (c == 0){ // TODO: avoid recalculating each time... return((b - a) * pchipval(pchipcoef, d)) } lower_bound = c*a+d upper_bound = c*b+d difflower = J(rows(pchipcoef),1, lower_bound) - pchipcoef[,5] diffupper = J(rows(pchipcoef),1, upper_bound) - pchipcoef[,5] indexlower = select((1::rows(pchipcoef)), difflower:<=0) indexlower = indexlower[1,1] indexupper = select((1::rows(pchipcoef)), diffupper:>=0) indexupper = indexupper[rows(indexupper),1] if (indexlower == indexupper) { midint =0 } else { yk1 = pchipcoef[indexlower+1::indexupper,4] yk = pchipcoef[indexlower::indexupper-1,4] hk = pchipcoef[indexlower+1::indexupper,5] - pchipcoef[indexlower::indexupper-1,5] dk1 = pchipcoef[indexlower+1::indexupper,3] dk = pchipcoef[indexlower::indexupper-1,3] integral = (1/2)*(yk1 + yk) :* hk - (1/12)*(dk1 - dk) :* (hk):^(2) midint = colsum(integral) } if (indexlower == 1) { leftint=0 } else { y1 = pchipcoef[indexlower,4] y0 = pchipcoef[indexlower-1,4] d1 = pchipcoef[indexlower,3] d0 = pchipcoef[indexlower-1,3] h0 = pchipcoef[indexlower,5] - pchipcoef[indexlower-1,5] x0 = pchipcoef[indexlower-1,5] u0 = lower_bound - x0 piece1 = (y1/(h0^2)) * u0^3 - (y1/(2*h0^3))*u0^4 + y0*u0 - (y0/h0^2)*u0^3 + (y0/(2*h0^3))*u0^4 piece2 = (d1/h0^2)*( (1/4)*u0^4 - (h0/3)*u0^3 ) + (d0/h0^2)*( (1/4)*u0^4 - (2*h0/3)*u0^3 + ((h0^2)/2)*u0^2 ) leftint = (1/2)*(y1+y0)*h0 - (1/12)*(d1-d0)*h0^2 - (piece1 + piece2) } if (indexupper == rows(pchipcoef)) { rightint=0 } else { y1 = pchipcoef[indexupper+1,4] y0 = pchipcoef[indexupper,4] d1 = pchipcoef[indexupper+1,3] d0 = pchipcoef[indexupper,3] h0 = pchipcoef[indexupper+1,5] - pchipcoef[indexupper,5] x0 = pchipcoef[indexupper,5] u0 = upper_bound - x0 piece1 = (y1/(h0^2)) * u0^3 - (y1/(2*h0^3))*u0^4 + y0*u0 - (y0/h0^2)*u0^3 + (y0/(2*h0^3))*u0^4 piece2 = (d1/h0^2)*( (1/4)*u0^4 - (h0/3)*u0^3 ) + (d0/h0^2)*( (1/4)*u0^4 - (2*h0/3)*u0^3 + ((h0^2)/2)*u0^2 ) rightint = piece1 + piece2 } analyint = (1/c) * (leftint + midint + rightint) return(analyint) } // ========================================================================= // B.3 Utilities // ========================================================================= // FUNCTION: breakdown_start_points // DESCRIPTION: calculates the initial points from the c_grid to use // in the bisection algorithm to find the breakdown point. // INPUTS: // - c_table: (matrix): col 1 - c-dep vals, col 2/3 - lower/upper bounds // - bd_y: (scalar): value for conclusion stat > bd_y // - side: "lower" ("upper"): at c = 0, stat > (<) bd_y // OUTPUT: saves stata macros clow and chi. These are the values of c such // that y(clow) < bd_y < y(chi) where y(.) is the lower or upper // bound as specified by side void breakdown_start_points(c_table, bd_y, side){ // collect all the values in cgrid before one of the bounds crosses bd_y // + determine the ordering of c-dep vals: if side = "lower" ("upper") // than y(c_max) > (<) y_bd. if (side == "lower") { index = selectindex((st_matrix(c_table)[,2]:>bd_y)) c1 = "chi" c2 = "clow" } else if (side == "upper") { index = selectindex((st_matrix(c_table)[,3]:= B) :* A + (B :> A) :* B return(D) } // FUNCTION: maximum 3 // DESCRIPTION: calculate elementwise maximum of three vectors function max3(A,B,C) { D = max2(A,B) E = max2(B,C) F = max2(D,E) return(F) } // FUNCTION: Apply Cumulative Function // DESCRIPTION: applies a mata vector function to rows 1:k for each row k // for a specified column of the matrix. // INPUT: // - matname (string): name of the stata matrix to operate on // - col (integer): column index to operate on // - f (pointer to function): function to apply cumulatively // RETURN: void, modifies column of input Stata matrix void apply_cumfunc(string scalar matname, real scalar col, pointer(real scalar function) scalar f){ mat = st_matrix(matname) for (row = 1; row <= rows(mat); row++) { mat[row, col] = (*f)(mat[|1, col \ row, col|]) } st_replacematrix(matname, mat) } // FUNCTION: Sort Stata matrix // DESCRIPTION: sort a Stata matrix in ascending order on one column // INPUT: // - matname, string: name of stata matrix to operate one // - sortcol, scalar: number of column to sort on // RETURN: void, replace input Stata matrix with sorted matrix void sort_st_matrix(string scalar matname, real scalar sortcol){ orig_mat = st_matrix(matname) perm = order(orig_mat, sortcol) sort_mat = orig_mat[perm,] row_l = st_matrixrowstripe(matname) sort_row_l = row_l[perm,] st_replacematrix(matname, sort_mat) st_matrixrowstripe(matname, sort_row_l) } // FUNCTION: Matrix Equal // DESCRIPTION: Check if two stata matrices are equal // INPUT: // - a,b: stata matrices to compare // - out: string: name of local to save result to // RETURN: save stata macro named `out` (1 = equal) void mat_eq(a, b, out){ st_local(out, strofreal(st_matrix(a) == st_matrix(b))) } end