/* ———— Code for Paper "A Permutation Test for the Regression Kink Design" Peter Ganong and Simon Jaeger ganong@gmail.com, sjaeger@mit.edu Please contact us with feature suggestions and questions! rdpermute (Version 1.0.2) ------------------------------- The function provides an implementation for the Regression Kink Permuation Test described in "A Permutation Test for the Regression Kink Design" Documentation: https://github.com/rdpermute/STATA ------------------------------- Authors: - Peter Ganong (ganong@uchicago.edu) - Simon Jaeger (sjaeger@mit.edu) Dependencies: - rd - rdrobust ------------------------------- References for Code: - Peter Ganong, Simon Jaeger (2017). A Permutation Test for the Regression Kink Design, Journal of the American Statistical Association http://dx.doi.org/10.1080/01621459.2017.1328356 - Sebastian Calonico, Matias D. Cattaneo, Max H. Farrell and Rocio Titiunik (2017). rdrobust: Software for regression-discontinuity Designs STATA package version 0.97. http://www-personal.umich.edu/~cattaneo/papers/Calonico-Cattaneo-Farrell-Titiunik_2017_Stata.pdf https://sites.google.com/site/rdpackages/rdrobust/stata - Nichols, Austin. 2011. rd 2.0: Revised Stata module for regression discontinuity estimation. http://ideas.repec.org/c/boc/bocode/s456888.html Output ---------------- Default matrix output: e(kink_beta_linear) e(kink_se_linear) e(bw_linear) e(pval_linear) e(kink_beta_quad) e(kink_se_quad) e(bw_quad) e(pval_quad) With N as number of placebo kinks, matrices kink* and bw* are Nx1. Matrices pval* are 2 x 1. Row 1 is asymptotic pvalue. Row 2 is randomization pvalue. Optional .dta output: collapses all of the above into a single file. Annotations --------------- - To avoid the unindented manipulation of Variables in the Dataset, we use the prefix rdpermute_ for all Variables defined in following code. */ cap program drop rdpermute program define rdpermute, eclass version 13.0 syntax varlist [, /// placebo_disconts(numlist) /// true_discont(string) /// position_true_discont(integer -1) /// deriv_discont(integer 1) /// bw(string) /// linear /// quad /// cubic /// skip_install /// filename(string) /// save_path(string) /// dgp(string) /// bw_manual(real 1) /// fg_bandwidth_scaling(numlist) /// fg_bias_porder(integer 4) /// fg_f_0(real 0) /// fg_density_porder(integer 3) /// fg_num_bins(integer 50) /// cct_bw_par(string) /// cct_reg_par(string) /// silent /// ] preserve *Format for console if ("`silent'" == "") { local noi = "noisily" `noi' di "" `noi' di "__________________________________________________" di "Operating in verbose mode. Add option to suppress intermediate output." } ************* *** SET DEFAULT VALUES FOR STRINGS ********* if("`bw'" == ""){ local bw = "fg_aic" } if("`bw'" == "cct"){ local reg = "cct" } else { local reg = "regress" } ***************** ***CHECK WE HAVE NEEDED ADOs, SET PARAMETERS AND MATRICES*** ***************** if("`placebo_disconts'" == ""){ di as error "Error: The required parameter placebo_disconts is missing. Please specify a numlist of placebo points." exit } if(missing(real("`true_discont'"))){ di as error "Error: The required parameter true_discont is missing. Please specify the location of the true discont as a single numeric point." exit } *review installed packages if("`skip_install'" != ""){ local required_ados "rdrobust.ado rd.ado" //add the required ados here// foreach x of local required_ados { capture findfile `x' if _rc==601 { di "Trying to install `x'" net install rdrobust, from("https://sites.google.com/site/rdpackages/rdrobust/stata") replace net get rdrobust, replace } } } *review required inputs local true_discont_in_list = 0 local counter = 1 foreach kink of numlist `placebo_disconts'{ if (`kink' == `true_discont'){ local true_discont_in_list = 1 scalar true_discont_index = `counter' } local ++counter } if (`true_discont_in_list' == 0 & `position_true_discont' >= 0){ local true_discont_in_list = 1 scalar true_discont_index = `position_true_discont' } if (`true_discont_in_list' == 0){ di as error "Error: true_discont is `true_discont' which is not in the list of placebo discontinuities `placebo_disconts'. If this is Error occurs due to the binary representation you can use additionally the optional parameter position_true_discont." exit } *review optional inputs if (`deriv_discont' != 0 & `deriv_discont' != 1){ di as error "Error: Must specify option deriv=0 for Regression Discontinuity or deriv=1 for Regression Kink." exit } else if (`deriv_discont' == 0){ qui `noi' di "Evaluating placebo regression discontinuities" } else if (`deriv_discont' == 1){ qui `noi' di "Evaluating placebo regression kinks" } if (!inlist("`bw'" ,"cct" ,"fg" ,"manual" ,"fg_aic")){ di as error "Error: Bandwidth choice must be , , or ." exit } qui { if "`bw'" == "fg_aic" & `deriv_discont' == 1{ `noi' di "Using AIC to choose polynomial order for bias estimate in FG bw choice" } else if "`bw'" == "fg" & `deriv_discont' == 1{ `noi' di "Using polynomial order of `fg_bias_porder' for bias estimate in FG bw choice" } else if ("`bw'" == "fg" ||"`bw'" == "fg_aic") & `deriv_discont' == 0{ `noi' di "Using Imbens and Kalyanaraman bandwidth selection." } if ("`linear'" == "" & "`quad'" == "" & "`cubic'" == "") { local linear = "linear" local quad = "quad" local cubic = "cubic" } if ("`bw'" == "fg" || "`bw'" == "fg_aic") & `deriv_discont' == 0 & ("`quad'" != "" || "`cubic'" != "") { di as error "Our code currently does not allow FG bw selection for local quadradtic or cubic models within a regression discontinuity design." exit } } tokenize "`varlist'" local y `1' local x `2' local var_linear = "rdpermute_kink_x_1 rdpermute_x_0 rdpermute_x_1" local var_quad = "rdpermute_kink_x_1 rdpermute_x_0 rdpermute_x_1 rdpermute_kink_x_2 rdpermute_x_2" local var_cubic = "rdpermute_kink_x_1 rdpermute_x_0 rdpermute_x_1 rdpermute_kink_x_2 rdpermute_x_2 rdpermute_kink_x_3 rdpermute_x_3" *Generate storage for output local kinks_n : list sizeof local(placebo_disconts) foreach p in `linear' `quad' `cubic' { mat bw_`p' = J(`kinks_n',1,.) mat kink_beta_`p' = J(`kinks_n',1,.) mat kink_se_`p' = J(`kinks_n',1,.) mat pval_`p' = J(2,1,.) } qui { ****************** *EVALUATE PLACEBO KINKS ****************** set more off local percentage_last = 0 local kink_counter = 1 *Progress bar initialization `noi' di as text "" `noi' di as text "Progress:" `noi' di as text "|....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|....| " `noi' di as text "|" _continue foreach kink of numlist `placebo_disconts' { *Progress bar update local total_num: word count `placebo_disconts' local percentage = round(`kink_counter' / `total_num' * 100) if (`percentage' > `percentage_last' ){ local times = `percentage' - `percentage_last' `noi' di as text _dup(`times') "*" _continue local percentage_last = `percentage' } *Calculation of values for each kink destring `x', replace destring `y', replace cap drop rdpermute_* gen rdpermute_y = `y' gen rdpermute_x_0 = 1 gen rdpermute_x_1 = `x'-`kink' gen rdpermute_kink_x_1 = rdpermute_x_1 * (rdpermute_x_1 >= 0) gen rdpermute_kink_x_2 = rdpermute_x_1^2 * (rdpermute_x_1 >= 0) gen rdpermute_discont_x_1 = (rdpermute_x_1 >= 0) gen rdpermute_kink_x_3 = rdpermute_x_1^3 * (rdpermute_x_1 >= 0) gen rdpermute_x_2 = rdpermute_x_1^2 gen rdpermute_x_3 = rdpermute_x_1^3 ****************** *BANDWIDTH SELECTION FOR EACH KINK ****************** if ("`bw'" == "fg" | "`bw'" == "fg_aic" ) { *Fan and Gijbels -- used by Card, Lee, Pei and Weber if `deriv_discont' == 1 { if ("`bw'" == "fg_aic") { *Prepare Variables cap drop rdpermute_x_2 cap drop rdpermute_x_3 qui reg rdpermute_y rdpermute_kink_x_1 rdpermute_x_1 , noconst estat ic matrix define AIC = r(S) local aic_p_order = 1 local aic_min = AIC[1,5] *Add successively one variable more to the model forvalues p = 2/20 { gen rdpermute_x_`p' = rdpermute_x_1^`p' matrix accum X_T_X = rdpermute_kink_x_1 rdpermute_x_* , noconst matrix symeig Vectors lambda = X_T_X *Calculate eigenvector for inversion control mata: st_matrix("minabslambda",min(abs(st_matrix("lambda")))) if minabslambda[1,1]<0.01 { *If inversion might get unstable turn models off, * that need at least the current p if (`p' == 4){ local cubic_off="off" } if (`p' == 3){ local quad_off="off" } continue, break } else{ *Save only model with minimal AIC estat ic matrix define AIC = r(S) local AIC_can = AIC[1,5] if (`AIC_can' < `aic_min' ) { local aic_min = AIC[1,5] local aic_p_order = `p' } } } local fg_opt_p_order = max(`deriv_discont'+1, `aic_p_order') forvalues p = 2(1)20 { cap drop rdpermute_x_`p' } if ("`quad_off'"=="off" & "`quad'"=="quad"){ `noi' di as error "Warning: Encountered numerical instabilities at a placebo discont. A possible reason is sparse data within the neighbourhood of a placebo discont. rdpermute will not compute the values for a quadratic model assumption, as the p-values might get random due to automatic omitting of variables by stata." local quad ="" } if ("`cubic_off'"=="off" & "`cubic'"=="cubic"){ `noi' di as error "Warning: Encountered numerical instabilities at a placebo discont. A possible reason is sparse data within the neighbourhood of a placebo discont. rdpermute will not compute the values for a cubic model assumption, as the p-values might get random due to automatic omitting of variables by stata." local cubic ="" } } * Use manual fg_opt_p_order for fg else { local fg_opt_p_order = `fg_bias_porder' } * Generate all neccessary variables forvalues p = 2 / `fg_opt_p_order' { cap gen rdpermute_x_`p' = rdpermute_x_1^`p' } qui reg rdpermute_y rdpermute_x_* rdpermute_kink_x_1, noconst local sigma_hat_0 = e(rss) / e(df_r) local m_hat_2_0 = 2*(_b[rdpermute_x_2]) if ( "`quad'" == "quad"){ cap gen rdpermute_x_3 = rdpermute_x_1^3 qui reg rdpermute_y rdpermute_x_* rdpermute_kink_x_1, noconst local m_hat_3_0 = 6*(_b[rdpermute_x_3]) } if ( "`cubic'" == "cubic"){ cap gen rdpermute_x_4 = rdpermute_x_1^4 qui reg rdpermute_y rdpermute_x_* rdpermute_kink_x_1, noconst local m_hat_4_0 = 24*(_b[rdpermute_x_4]) } cap drop rdpermute_bin_var rdpermute_rdpermute_f_* rdpermute_n_obs *Calculation of fg_f_0 if not specified* if (`fg_f_0' == 0) { sum rdpermute_x_1 local min_x = r(min) local max_x = r(max) local step_size = (r(max)-r(min))/`fg_num_bins' gen rdpermute_bin_var = . forvalues mu = 1/`fg_num_bins' { replace rdpermute_bin_var = `mu' if rdpermute_x_1>=(`min_x'+(`mu'-1)*`step_size')&rdpermute_x_1<(`min_x'+(`mu')*`step_size') } bys rdpermute_bin_var: egen rdpermute_n_obs = count(rdpermute_x_1) gen rdpermute_f_discrete = rdpermute_n_obs/_N forvalues p = 0/`fg_density_porder' { gen rdpermute_f_x_`p' = rdpermute_x_1^`p' } qui reg rdpermute_f_discrete rdpermute_f_x_*, noconst local f_hat_0 = `fg_num_bins' * _b[rdpermute_f_x_0] / (`max_x'-`min_x') } else { local f_hat_0 = `fg_f_0' } local h_linear = 2.35 * ([`sigma_hat_0'/((`m_hat_2_0')^2*`f_hat_0')]^(1/5))*_N^(-1/5) if ( "`quad'" !=""){ local h_quad = 3.93 * ([`sigma_hat_0'/((`m_hat_3_0')^2*`f_hat_0')]^(1/7))*_N^(-1/7) } if ( "`cubic'" != ""){ local h_cubic = 3.93 * ([`sigma_hat_0'/((`m_hat_4_0')^2*`f_hat_0')]^(1/9))*_N^(-1/9) } if ("`fg_bandwidth_scaling'"!=""){ local number1 : word 1 of `fg_bandwidth_scaling' local number2 : word 2 of `fg_bandwidth_scaling' local h_linear = `number1' * ([`sigma_hat_0' / ((`m_hat_2_0')^2 * `f_hat_0')]^`number2') * _N^`number2' if ( "`quad'" !=""){ local h_quad = `number1' * ([`sigma_hat_0' / ((`m_hat_3_0')^2 * `f_hat_0')]^`number2') * _N^`number2' } if ( "`cubic'" !=""){ local h_cubic = `number1' * ([`sigma_hat_0' / ((`m_hat_4_0')^2 * `f_hat_0')]^`number2') * _N^`number2' } } } *Imbens and Kalyanaraman -- based on Fan and Gijbels if `deriv_discont' == 0 { if ("`linear'"~="") { qui rd rdpermute_y rdpermute_x_1, kernel(triangle) local h_linear = e(w) } } } if ("`bw'" == "manual") { local h_linear = `bw_manual' local h_quad = `bw_manual' local h_cubic = `bw_manual' } ****************** *RUN REGRESSIONS ****************** if (`deriv_discont' == 0) { local rdvar = "rdpermute_discont_x_1" local estimand = "rdpermute_discont_x_1" } else if (`deriv_discont' == 1) { local estimand = "rdpermute_kink_x_1" } if ("`reg'" == "regress" ) { foreach p in `linear' `quad' `cubic' { qui reg rdpermute_y `var_`p'' `rdvar' if abs(rdpermute_x_1)< `h_`p'', robust noconstant mat bw_`p'[`kink_counter',1] = `h_`p'' mat kink_beta_`p'[`kink_counter',1] = _b[`estimand'] mat kink_se_`p'[`kink_counter',1] = _se[`estimand'] } } else if ("`reg'" == "cct") { foreach p in `linear' `quad' `cubic'{ if ("`p'" == "linear") { local p_n = 1 local q_n = 2 } else if ("`p'" == "quad") { local p_n = 2 local q_n = 3 } else if ("`p'" == "cubic") { local p_n = 3 local q_n = 4 } local reg_kernel = "triangular" local reg_bwselect = "mserd" local reg_vce = "nn 3" *set alternative parameters for rdrobust if ("`cct_reg_par'" != ""){ foreach par1name in "reg_c" "reg_fuzzy" "reg_covs" "reg_kernel" "reg_weights" "reg_bwselect" "reg_vce" "reg_b" { local from = strpos("`cct_reg_par'", "<"+"`par1name'"+">") + strlen("`par1name'") +2 local to = strpos("`cct_reg_par'", "") - `from' if(`to' != 0){ local `par1name' = substr("`cct_reg_par'", `from' , `to') } } } rdrobust rdpermute_y rdpermute_x_1, p(`p_n') q(`q_n') deriv(`deriv_discont') kernel("`reg_kernel'") bwselect("`reg_bwselect'") b("`reg_b'") vce("`reg_vce'") fuzzy("`reg_fuzzy'") covs("`reg_covs'") weights("`reg_weights'") mat bw_`p'[`kink_counter',1] = e(h_l) mat kink_beta_`p'[`kink_counter',1] = e(tau_bc) mat kink_se_`p'[`kink_counter',1] = e(se_tau_rb) } } local ++kink_counter } ****************** *COMPUTE PVALUES ****************** foreach p in `linear' `quad' `cubic' { `noi' di as result "" `noi' di as result "pvalues for local `p' model" *asymptotic SEs local t = kink_beta_`p'[true_discont_index,1]/kink_se_`p'[true_discont_index,1] mat pval_`p'[1,1] = 2*(1-normal(abs(`t'))) `noi' di as result "Bandwidth at true discontinuity using `bw': " bw_`p'[true_discont_index,1] `noi' di as result "Coef at true discontinuity is " kink_beta_`p'[true_discont_index,1] local tmp_pval = round(pval_`p'[1,1], 0.0001) if ( `tmp_pval' == 0) { local tmp_pval = "<.0001" } `noi' di as result "p-value asymptotic: " "`tmp_pval'" *randomization-based SEs clear qui svmat kink_beta_`p' egen rank_tmp_pos = rank(kink_beta_`p'1) scalar pval = 2*min(rank_tmp_pos[true_discont_index]/`kinks_n',1-((rank_tmp_pos[true_discont_index])-1)/`kinks_n') `noi' di as result "p-value random: " pval mat pval_`p'[2,1] = pval mat list pval_`p' } } ****************** *SUMMARY OUTPUT ****************** foreach mat_stub in kink_beta kink_se bw pval { foreach p in `linear' `quad' `cubic' { *save .dta output if "`filename'"!=""{ clear qui svmat `mat_stub'_`p' cap gen dgp = "`dgp'" cap gen kink_location = _n qui save "`save_path'`mat_stub'_`p'`dgp'", replace } *save matrix output ereturn matrix `mat_stub'_`p' `mat_stub'_`p' } } clear if "`filename'"!=""{ foreach mat_stub in kink_beta kink_se bw pval { foreach p in `linear' `quad' `cubic' { append using "`save_path'`mat_stub'_`p'`dgp'" } } collapse kink_beta* kink_se* bw* pval*, by(kink_location) sort kink_location gen actualKink = _n == true_discont_index qui `noi' save `save_path'`filename', replace } cap drop rdpermute_* qui `noi' di as text "__________________________________________________" restore end