/*=========================================================================== * example_kdid.do - Generalized K-DID Examples * * Demonstrates the K-DID extension from Appendix E of Egami & Yamauchi (2023). * Uses synthetic data to illustrate when and how K>2 components help. * * Contents: * Part 1: Basic K-DID — no confounding (all k agree) * Part 2: Linear confounding — k=2,3 correct, k=1 biased * Part 3: Quadratic confounding — k=3 correct, k=1,2 biased * Part 4: J-test moment selection * Part 5: SA K-DID with Paglayan (2019) data * * Key concepts: * - kmax(K) combines K component estimators via GMM * - k=1: standard parallel trends (constant confounding) * - k=2: parallel trends-in-trends (linear confounding) * - k=3: 2nd-order parallel trends (quadratic confounding) * - jtest(on): Hansen J-test for adaptive moment selection * * Reference: * Egami, N. & Yamauchi, S. (2023). Using Multiple Pretreatment Periods * to Improve Difference-in-Differences and Staggered Adoption Designs. * Political Analysis 31(2): 195-212. *===========================================================================*/ version 16 clear all set more off /*--------------------------------------------------------------------------- * Part 1: Basic K-DID — No Confounding * * DGP: Y = 5 + 1.5*Gi + 0.3*t + ATT*Gi*1(t>=0) + eps * True ATT = 2.0 * No confounding: all k=1,2,3 should recover ATT ≈ 2.0 *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "PART 1: BASIC K-DID — NO CONFOUNDING (ATT = 2.0)" di as txt _dup(70) "=" _n set seed 2024 local N = 400 local T = 7 quietly { set obs `=`N'*`T'' gen id = ceil(_n/`T') bysort id: gen t = _n - 5 // -4,-3,-2,-1,0,1,2 gen Gi = (id <= `=`N'/2') gen treatment = Gi * (t >= 0) gen year = 2010 + t gen Y = 5 + 1.5*Gi + 0.3*t + 2.0*Gi*(t>=0) + rnormal(0, 0.5) } di as txt "Data: `N' units, `T' periods (-4 to +2), treatment at t=0" di as txt "DGP: Y = 5 + 1.5*Gi + 0.3*t + 2.0*Gi*1(t>=0) + N(0, 0.5)" di as txt "True ATT = 2.0, no confounding" _n * Standard K=2 (backward-compatible Double DID) di as txt "--- kmax(2): Standard Double DID ---" diddesign Y, treatment(treatment) time(year) id(id) nboot(200) seed(42) * K=3: Generalized K-DID di as txt _n "--- kmax(3): Generalized K-DID ---" diddesign Y, treatment(treatment) time(year) id(id) nboot(200) seed(42) kmax(3) di as txt _n "Note: Under no confounding, all three components (k=1,2,3)" di as txt "should produce similar estimates near the true ATT = 2.0." /*--------------------------------------------------------------------------- * Part 2: Linear Confounding * * DGP: Y = 5 + 1.5*Gi + 0.3*t + 0.5*Gi*t + ATT*Gi*1(t>=0) + eps * The 0.5*Gi*t term creates linear time-varying confounding. * True ATT = 1.0 * * Expected behavior: * k=1 (DID): BIASED — violates parallel trends * k=2 (sDID): Unbiased — accounts for linear confounding * k=3: Unbiased — also accounts for linear confounding *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "PART 2: LINEAR CONFOUNDING (ATT = 1.0)" di as txt _dup(70) "=" _n clear set seed 2024 local N = 400 local T = 7 quietly { set obs `=`N'*`T'' gen id = ceil(_n/`T') bysort id: gen t = _n - 5 gen Gi = (id <= `=`N'/2') gen treatment = Gi * (t >= 0) gen year = 2010 + t gen Y = 5 + 1.5*Gi + 0.3*t + 0.5*Gi*t + 1.0*Gi*(t>=0) + rnormal(0, 0.5) } di as txt "DGP: Y = 5 + 1.5*Gi + 0.3*t + 0.5*Gi*t + 1.0*Gi*1(t>=0) + eps" di as txt "True ATT = 1.0, linear confounding (0.5*Gi*t)" _n di as txt "--- kmax(3): K-DID under linear confounding ---" diddesign Y, treatment(treatment) time(year) id(id) nboot(200) seed(42) kmax(3) di as txt _n "Expected: k=1 is biased (estimate > 1.0)," di as txt " k=2 and k=3 are approximately unbiased." /*--------------------------------------------------------------------------- * Part 3: Quadratic Confounding * * DGP: Y = 5 + 1.5*Gi + 0.3*t + 0.3*Gi*t + 0.15*Gi*t^2 + ATT + eps * The 0.15*Gi*t^2 term creates quadratic time-varying confounding. * True ATT = 1.0 * * Expected behavior: * k=1 (DID): BIASED * k=2 (sDID): BIASED — linear correction is insufficient * k=3: Unbiased — accounts for quadratic confounding *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "PART 3: QUADRATIC CONFOUNDING (ATT = 1.0)" di as txt _dup(70) "=" _n clear set seed 2024 local N = 600 local T = 7 quietly { set obs `=`N'*`T'' gen id = ceil(_n/`T') bysort id: gen t = _n - 5 gen Gi = (id <= `=`N'/2') gen treatment = Gi * (t >= 0) gen year = 2010 + t gen Y = 5 + 1.5*Gi + 0.3*t + 0.3*Gi*t + 0.15*Gi*t^2 /// + 1.0*Gi*(t>=0) + rnormal(0, 0.5) } di as txt "DGP: Y = 5 + 1.5*Gi + 0.3*t + 0.3*Gi*t + 0.15*Gi*t^2" di as txt " + 1.0*Gi*1(t>=0) + eps" di as txt "True ATT = 1.0, quadratic confounding (0.15*Gi*t^2)" _n di as txt "--- kmax(3): K-DID under quadratic confounding ---" diddesign Y, treatment(treatment) time(year) id(id) nboot(200) seed(42) kmax(3) di as txt _n "Expected: k=1 and k=2 are biased," di as txt " k=3 is approximately unbiased (removes quadratic trend)." di as txt "This is the key advantage of using K>2 pre-treatment periods." /*--------------------------------------------------------------------------- * Part 4: J-test Moment Selection * * Under strong linear confounding, the J-test should detect that k=1 * (standard parallel trends) is violated and drop it, keeping k=2,3. *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "PART 4: J-TEST MOMENT SELECTION" di as txt _dup(70) "=" _n clear set seed 2024 local N = 600 local T = 7 quietly { set obs `=`N'*`T'' gen id = ceil(_n/`T') bysort id: gen t = _n - 5 gen Gi = (id <= `=`N'/2') gen treatment = Gi * (t >= 0) gen year = 2010 + t gen Y = 5 + 1.5*Gi + 0.3*t + 1.0*Gi*t + 1.0*Gi*(t>=0) + rnormal(0, 0.3) } di as txt "DGP: Strong linear confounding (1.0*Gi*t)" di as txt "True ATT = 1.0" _n di as txt "--- Without J-test: kmax(3) ---" diddesign Y, treatment(treatment) time(year) id(id) nboot(300) seed(42) kmax(3) matrix est_nojtest = e(estimates) di as txt _n "--- With J-test: kmax(3) jtest(on) ---" diddesign Y, treatment(treatment) time(year) id(id) nboot(300) seed(42) /// kmax(3) jtest(on) di as txt _n "The J-test adaptively selects which moment conditions to use." di as txt "Under strong linear confounding, it should drop k=1 (standard PT)" di as txt "and retain k=2,3 (which require weaker assumptions)." capture confirm matrix e(k_summary) if _rc == 0 { matrix ks = e(k_summary) di as txt _n "K_init = " ks[1,1] ", K_final = " ks[1,3] } capture confirm matrix e(jtest_stats) if _rc == 0 { matrix js = e(jtest_stats) di as txt "J-statistic = " %6.3f js[1,1] ", p-value = " %6.4f js[1,3] } /*--------------------------------------------------------------------------- * Part 5: SA K-DID with Paglayan (2019) Data * * The Paglayan dataset has ~40 years of panel data with staggered adoption. * Each treatment cohort has many pre-treatment periods, enabling SA K-DID. *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "PART 5: STAGGERED ADOPTION K-DID (PAGLAYAN 2019)" di as txt _dup(70) "=" _n capture noisily use paglayan2019, clear if _rc != 0 { capture use paglayan2019, clear } if _rc != 0 { di as err "paglayan2019.dta not found; skipping Part 5" } else { gen log_expenditure = log(pupil_expenditure + 1) encode state, gen(id_subject) di as txt "Data: US states, 1959-2000, staggered adoption of" di as txt " collective bargaining requirements for teachers" _n * Standard SA (kmax=2) di as txt "--- SA design, kmax(2): Standard SA Double DID ---" diddesign log_expenditure, treatment(treatment) id(id_subject) time(year) /// design(sa) thres(1) nboot(200) seed(42) * SA K-DID (kmax=3) di as txt _n "--- SA design, kmax(3): SA K-DID ---" diddesign log_expenditure, treatment(treatment) id(id_subject) time(year) /// design(sa) thres(1) nboot(200) seed(42) kmax(3) di as txt _n "The SA K-DID extends the basic SA Double DID by allowing for" di as txt "higher-order polynomial time-varying confounding across cohorts." } /*--------------------------------------------------------------------------- * Summary *---------------------------------------------------------------------------*/ di as txt _n "WHEN TO USE K-DID:" _newline /// " kmax(2) — Default. Handles constant and linear confounding." _newline /// " kmax(3) — Use with 3+ pre-periods for quadratic confounding." _newline /// " jtest(on) — Adaptive moment selection via Hansen J-test." _newline /// " design(sa) kmax(3) — SA K-DID for staggered adoption."