/*=========================================================================== * example_malesky.do - Basic DID with Repeated Cross-Sectional Data * * Replication of Malesky et al. (2014) analysis using diddesign * * Data Description: * Repeated cross-sectional data from Vietnam communes (2006, 2008, 2010). * Treatment: Abolition of elected councils (implemented in 2009). * Study examines the effect on local public services. * * Variables: * - id_district: District identifier (string, needs encoding) * - year: Year (2006, 2008, 2010) * - treatment: Treated commune indicator (0/1) * - pro4: Education and Cultural Program indicator [outcome] * - tapwater: Tap water availability indicator [outcome] * - agrext: Agricultural extension center indicator [outcome] * - lnarea, lnpopden, city, reg8: Control variables * * Key Notes: * - This is REPEATED CROSS-SECTIONAL data, NOT panel data * - Use post() instead of id() for RCS data * - post() identifies repeated cross-section data; explicit rcs is optional * - Cluster standard errors at district level * * Reference: * Malesky, Nguyen, and Tran (2014). "The Impact of Recentralization on * Public Services: A Difference-in-Differences Analysis of the Abolition * of Elected Councils in Vietnam." American Political Science Review. *===========================================================================*/ version 16 clear all set more off capture log close local outdir "output" capture mkdir "`outdir'" log using "`outdir'/example_malesky.log", replace /*--------------------------------------------------------------------------- * Section 1: Data Loading and Exploration *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "SECTION 1: DATA LOADING AND EXPLORATION" di as txt _dup(70) "=" _n capture noisily use malesky2014, clear if _rc != 0 { display as error "Error: malesky2014.dta is not installed" display as error "Please run: net install diddesign, from(...)" exit _rc } // Basic data description di as txt "Data Description:" describe // Summary statistics for key variables di as txt _n "Summary Statistics for Key Variables:" summarize pro4 tapwater agrext treatment year // Check data structure - this is RCS data, not panel di as txt _n "Data Structure (Repeated Cross-Section):" di as txt "Note: Each year contains different observations, not a panel" tabulate year treatment // Check post-treatment indicator (treatment happened in 2009) // Note: post_treat already exists in the dataset di as txt _n "Post-Treatment Indicator:" di as txt "Using existing post_treat variable (1 if year == 2010)" tabulate year post_treat // Encode district identifier for clustering di as txt _n "Encoding District Identifier for Clustering..." encode id_district, gen(id_cluster_num) label variable id_cluster_num "Numeric district identifier" // Create region fixed effects dummies di as txt _n "Creating Region Fixed Effects Dummies..." quietly tabulate reg8, gen(reg8_) // Check covariate availability di as txt _n "Covariate Summary:" summarize lnarea lnpopden city reg8 /*--------------------------------------------------------------------------- * Section 2: Parallel Trends Assessment *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "SECTION 2: PARALLEL TRENDS ASSESSMENT" di as txt _dup(70) "=" _n // Set random seed for reproducibility set seed 1234 // Placebo test: lag=1 (comparing 2006-2008 pre-treatment trends) // ===================================================== // Test 1: pro4 (Education and Cultural Program) // ===================================================== di as txt _n "=== Testing pro4 (Education and Cultural Program) ===" di as txt "Running parallel trends test (nboot=200 for speed)..." diddesign_check pro4, /// treatment(treatment) time(year) /// post(post_treat) /// lag(1) /// cluster(id_cluster_num) /// nboot(200) // Store results matrix pro4_placebo = e(placebo) local pro4_est = pro4_placebo[1,4] local pro4_se = pro4_placebo[1,5] local pro4_eqci_lb = pro4_placebo[1,6] local pro4_eqci_ub = pro4_placebo[1,7] local pro4_eqci_radius = max(abs(`pro4_eqci_lb'), abs(`pro4_eqci_ub')) di as txt _n "pro4 Parallel Trends Test:" di as txt " Estimate(raw): " %9.4f `pro4_est' di as txt " Std. Error(raw): " %9.4f `pro4_se' di as txt " EqCI95: [" %9.4f `pro4_eqci_lb' ", " %9.4f `pro4_eqci_ub' "]" // ===================================================== // Test 2: tapwater (Tap Water) // ===================================================== di as txt _n "=== Testing tapwater (Tap Water) ===" di as txt "Running parallel trends test..." diddesign_check tapwater, /// treatment(treatment) time(year) /// post(post_treat) /// lag(1) /// cluster(id_cluster_num) /// nboot(200) matrix tapwater_placebo = e(placebo) local tapwater_est = tapwater_placebo[1,4] local tapwater_se = tapwater_placebo[1,5] local tapwater_eqci_lb = tapwater_placebo[1,6] local tapwater_eqci_ub = tapwater_placebo[1,7] local tapwater_eqci_radius = max(abs(`tapwater_eqci_lb'), abs(`tapwater_eqci_ub')) di as txt _n "tapwater Parallel Trends Test:" di as txt " Estimate(raw): " %9.4f `tapwater_est' di as txt " Std. Error(raw): " %9.4f `tapwater_se' di as txt " EqCI95: [" %9.4f `tapwater_eqci_lb' ", " %9.4f `tapwater_eqci_ub' "]" // ===================================================== // Test 3: agrext (Agricultural Center) // ===================================================== di as txt _n "=== Testing agrext (Agricultural Center) ===" di as txt "Running parallel trends test..." diddesign_check agrext, /// treatment(treatment) time(year) /// post(post_treat) /// lag(1) /// cluster(id_cluster_num) /// nboot(200) matrix agrext_placebo = e(placebo) local agrext_est = agrext_placebo[1,4] local agrext_se = agrext_placebo[1,5] local agrext_eqci_lb = agrext_placebo[1,6] local agrext_eqci_ub = agrext_placebo[1,7] local agrext_eqci_radius = max(abs(`agrext_eqci_lb'), abs(`agrext_eqci_ub')) di as txt _n "agrext Parallel Trends Test:" di as txt " Estimate(raw): " %9.4f `agrext_est' di as txt " Std. Error(raw): " %9.4f `agrext_se' di as txt " EqCI95: [" %9.4f `agrext_eqci_lb' ", " %9.4f `agrext_eqci_ub' "]" // Summary Table di as txt _n _dup(70) "-" di as txt "PARALLEL TRENDS ASSESSMENT SUMMARY (raw Estimate/SE + EqCI95)" di as txt _dup(70) "-" di as txt "Variable Estimate(raw) Std.Error(raw) EqCI95(rad) Conclusion" di as txt _dup(70) "-" di as txt "pro4 " %9.4f `pro4_est' " " %9.4f `pro4_se' " " %10.4f `pro4_eqci_radius' " Inspect with plot" di as txt "tapwater " %9.4f `tapwater_est' " " %9.4f `tapwater_se' " " %10.4f `tapwater_eqci_radius' " Inspect with plot" di as txt "agrext " %9.4f `agrext_est' " " %9.4f `agrext_se' " " %10.4f `agrext_eqci_radius' " Inspect with plot" di as txt _dup(70) "-" /*--------------------------------------------------------------------------- * Section 3: Basic DID Estimation (Without Covariates) *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "SECTION 3: BASIC DID ESTIMATION (NO COVARIATES)" di as txt _dup(70) "=" _n set seed 1234 // ===================================================== // Estimate 1: pro4 // ===================================================== di as txt "=== Estimating effect on pro4 (Education Program) ===" diddesign pro4, /// treatment(treatment) time(year) /// post(post_treat) /// cluster(id_cluster_num) /// nboot(200) // Store results - e(estimates) columns: [lead, estimate, std_error, ci_lo, ci_hi, weight] // Row order: Double-DID (1), DID (2), sDID (3) local pro4_ddid = e(estimates)[1,2] local pro4_did = e(estimates)[2,2] local pro4_sdid = e(estimates)[3,2] local pro4_w_did = e(weights)[1,1] local pro4_w_sdid = e(weights)[1,2] di as txt _n "pro4 Estimation Results:" di as txt " Double-DID: " %9.4f `pro4_ddid' di as txt " DID: " %9.4f `pro4_did' di as txt " sDID: " %9.4f `pro4_sdid' di as txt " Weights: w_DID=" %5.3f `pro4_w_did' ", w_sDID=" %5.3f `pro4_w_sdid' // ===================================================== // Estimate 2: tapwater // ===================================================== di as txt _n "=== Estimating effect on tapwater (Tap Water) ===" diddesign tapwater, /// treatment(treatment) time(year) /// post(post_treat) /// cluster(id_cluster_num) /// nboot(200) local tapwater_ddid = e(estimates)[1,2] local tapwater_did = e(estimates)[2,2] local tapwater_sdid = e(estimates)[3,2] local tapwater_w_did = e(weights)[1,1] local tapwater_w_sdid = e(weights)[1,2] di as txt _n "tapwater Estimation Results:" di as txt " Double-DID: " %9.4f `tapwater_ddid' di as txt " DID: " %9.4f `tapwater_did' di as txt " sDID: " %9.4f `tapwater_sdid' di as txt " Weights: w_DID=" %5.3f `tapwater_w_did' ", w_sDID=" %5.3f `tapwater_w_sdid' // ===================================================== // Estimate 3: agrext // ===================================================== di as txt _n "=== Estimating effect on agrext (Agricultural Center) ===" diddesign agrext, /// treatment(treatment) time(year) /// post(post_treat) /// cluster(id_cluster_num) /// nboot(200) local agrext_ddid = e(estimates)[1,2] local agrext_did = e(estimates)[2,2] local agrext_sdid = e(estimates)[3,2] local agrext_w_did = e(weights)[1,1] local agrext_w_sdid = e(weights)[1,2] di as txt _n "agrext Estimation Results:" di as txt " Double-DID: " %9.4f `agrext_ddid' di as txt " DID: " %9.4f `agrext_did' di as txt " sDID: " %9.4f `agrext_sdid' di as txt " Weights: w_DID=" %5.3f `agrext_w_did' ", w_sDID=" %5.3f `agrext_w_sdid' /*--------------------------------------------------------------------------- * Section 4: Estimation with Covariates (Paper Replication) *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "SECTION 4: ESTIMATION WITH COVARIATES (PAPER REPLICATION)" di as txt _dup(70) "=" _n di as txt "Covariates: lnarea, lnpopden, city, reg8 (regional FE)" di as txt "Clustering: id_district level" di as txt "Bootstrap: 200 iterations (use 2000 for publication)" di as txt _n _dup(50) "-" set seed 1234 // ===================================================== // pro4 with covariates // ===================================================== di as txt _n "=== pro4 with covariates ===" diddesign pro4, /// treatment(treatment) time(year) /// post(post_treat) /// covariates("lnarea lnpopden city reg8_2 reg8_3 reg8_4 reg8_5 reg8_6 reg8_7") /// cluster(id_cluster_num) /// nboot(200) local pro4_cov_ddid = e(estimates)[1,2] local pro4_cov_se = e(estimates)[1,3] local pro4_cov_ci_lo = e(estimates)[1,4] local pro4_cov_ci_hi = e(estimates)[1,5] di as txt "pro4 Results (with covariates):" di as txt " Double-DID: " %9.4f `pro4_cov_ddid' di as txt " Std. Error: " %9.4f `pro4_cov_se' di as txt " 95% CI: [" %7.4f `pro4_cov_ci_lo' ", " %7.4f `pro4_cov_ci_hi' "]" // ===================================================== // tapwater with covariates // ===================================================== di as txt _n "=== tapwater with covariates ===" diddesign tapwater, /// treatment(treatment) time(year) /// post(post_treat) /// covariates("lnarea lnpopden city reg8_2 reg8_3 reg8_4 reg8_5 reg8_6 reg8_7") /// cluster(id_cluster_num) /// nboot(200) local tapwater_cov_ddid = e(estimates)[1,2] local tapwater_cov_did = e(estimates)[2,2] local tapwater_cov_sdid = e(estimates)[3,2] local tapwater_cov_se = e(estimates)[1,3] di as txt "tapwater Results (with covariates):" di as txt " Double-DID: " %9.4f `tapwater_cov_ddid' di as txt " DID: " %9.4f `tapwater_cov_did' di as txt " sDID: " %9.4f `tapwater_cov_sdid' di as txt " Note: tapwater has notable pre-trend; Double-DID applies the weaker-assumption correction" // ===================================================== // agrext with covariates // ===================================================== di as txt _n "=== agrext with covariates ===" diddesign agrext, /// treatment(treatment) time(year) /// post(post_treat) /// covariates("lnarea lnpopden city reg8_2 reg8_3 reg8_4 reg8_5 reg8_6 reg8_7") /// cluster(id_cluster_num) /// nboot(200) local agrext_cov_ddid = e(estimates)[1,2] local agrext_cov_did = e(estimates)[2,2] local agrext_cov_sdid = e(estimates)[3,2] di as txt "agrext Results (with covariates):" di as txt " Double-DID: " %9.4f `agrext_cov_ddid' di as txt " DID: " %9.4f `agrext_cov_did' di as txt " sDID: " %9.4f `agrext_cov_sdid' di as txt " Note: Neither EPT nor PTT holds; estimates may be biased" /*--------------------------------------------------------------------------- * Section 5: Visualization *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "SECTION 5: VISUALIZATION" di as txt _dup(70) "=" _n // Generate plots for each outcome // pro4 - Trends plot di as txt "Generating plots for pro4..." set seed 1234 diddesign_check pro4, /// treatment(treatment) time(year) /// post(post_treat) lag(1) cluster(id_cluster_num) nboot(200) diddesign_plot, type(trends) saving("`outdir'/malesky_pro4_trends.png") replace // tapwater - Trends plot di as txt "Generating plots for tapwater..." set seed 1234 diddesign_check tapwater, /// treatment(treatment) time(year) /// post(post_treat) lag(1) cluster(id_cluster_num) nboot(200) diddesign_plot, type(trends) saving("`outdir'/malesky_tapwater_trends.png") replace // agrext - Trends plot di as txt "Generating plots for agrext..." set seed 1234 diddesign_check agrext, /// treatment(treatment) time(year) /// post(post_treat) lag(1) cluster(id_cluster_num) nboot(200) diddesign_plot, type(trends) saving("`outdir'/malesky_agrext_trends.png") replace di as txt _n "Plots saved: output/malesky_*_trends.png" /*--------------------------------------------------------------------------- * Section 6: Results Summary *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "SECTION 6: RESULTS SUMMARY" di as txt _dup(70) "=" _n di as txt "Parallel Trends Assessment (lag=1, raw scale):" di as txt _dup(64) "-" di as txt "Variable Est(raw) SE(raw) EqCI95 LB EqCI95 UB" di as txt _dup(64) "-" di as txt "pro4 " %9.4f `pro4_est' " " %9.4f `pro4_se' " " %9.4f `pro4_eqci_lb' " " %9.4f `pro4_eqci_ub' di as txt "tapwater " %9.4f `tapwater_est' " " %9.4f `tapwater_se' " " %9.4f `tapwater_eqci_lb' " " %9.4f `tapwater_eqci_ub' di as txt "agrext " %9.4f `agrext_est' " " %9.4f `agrext_se' " " %9.4f `agrext_eqci_lb' " " %9.4f `agrext_eqci_ub' di as txt _dup(64) "-" di as txt _n "Treatment Effects (lead=0, with covariates -- Double-DID):" di as txt _dup(40) "-" di as txt "pro4 " %9.4f `pro4_cov_ddid' di as txt "tapwater " %9.4f `tapwater_cov_ddid' di as txt "agrext " %9.4f `agrext_cov_ddid' di as txt _dup(40) "-" di as txt _n "Paper reference (Table 2, Figure 4):" di as txt " pro4: Double-DID = 0.082; EPT is plausible" di as txt " tapwater: Double-DID = -0.119; EPT is questionable (positive pre-trend)" di as txt " agrext: pre-trends are inconsistent; causal estimate is not credible" /*--------------------------------------------------------------------------- * Section 7: Exporting Results *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "SECTION 7: EXPORTING RESULTS" di as txt _dup(70) "=" _n // Create summary matrix matrix results_summary = J(3, 6, .) matrix rownames results_summary = "pro4" "tapwater" "agrext" matrix colnames results_summary = "PT_est" "PT_EqCI95" "DID" "sDID" "DoubleDID" "Preferred" // Fill in results (using stored locals) matrix results_summary[1,1] = `pro4_est' matrix results_summary[1,2] = `pro4_eqci_radius' matrix results_summary[1,3] = `pro4_did' matrix results_summary[1,4] = `pro4_sdid' matrix results_summary[1,5] = `pro4_ddid' matrix results_summary[1,6] = `pro4_ddid' // Preferred: DoubleDID matrix results_summary[2,1] = `tapwater_est' matrix results_summary[2,2] = `tapwater_eqci_radius' matrix results_summary[2,3] = `tapwater_did' matrix results_summary[2,4] = `tapwater_sdid' matrix results_summary[2,5] = `tapwater_ddid' matrix results_summary[2,6] = `tapwater_ddid' // Preferred: Double-DID under weaker PTT-based logic matrix results_summary[3,1] = `agrext_est' matrix results_summary[3,2] = `agrext_eqci_radius' matrix results_summary[3,3] = `agrext_did' matrix results_summary[3,4] = `agrext_sdid' matrix results_summary[3,5] = `agrext_ddid' matrix results_summary[3,6] = . // No credible estimate di as txt "Results Summary Matrix:" matrix list results_summary, format(%9.4f) // LaTeX table output di as txt _n "LaTeX-Ready Table (example output format):" di as txt "" di as txt "\begin{table}[htbp]" di as txt "\centering" di as txt "\caption{Parallel Trends Assessment and Treatment Effects}" di as txt "\label{tab:malesky_results}" di as txt "\begin{tabular}{lccccc}" di as txt "\hline\hline" di as txt "Outcome & PT Est. & EqCI95 radius & DID & sDID & Double-DID \\" di as txt "\hline" di as txt "Education Program & " %6.3f `pro4_est' " & " %5.3f `pro4_eqci_radius' " & " %6.3f `pro4_did' " & " %6.3f `pro4_sdid' " & " %6.3f `pro4_ddid' " \\" di as txt "Tap Water & " %6.3f `tapwater_est' " & " %5.3f `tapwater_eqci_radius' " & " %6.3f `tapwater_did' " & " %6.3f `tapwater_sdid' " & " %6.3f `tapwater_ddid' " \\" di as txt "Agricultural Center & " %6.3f `agrext_est' " & " %5.3f `agrext_eqci_radius' " & " %6.3f `agrext_did' " & " %6.3f `agrext_sdid' " & " %6.3f `agrext_ddid' " \\" di as txt "\hline\hline" di as txt "\multicolumn{6}{l}{\footnotesize Notes: Bootstrap standard errors clustered at district level.} \\" di as txt "\end{tabular}" di as txt "\end{table}" // Export to CSV preserve clear set obs 3 gen outcome = "" gen pt_estimate = . gen pt_eqci95_radius = . gen did = . gen sdid = . gen double_did = . gen preferred = "" replace outcome = "pro4" in 1 replace pt_estimate = `pro4_est' in 1 replace pt_eqci95_radius = `pro4_eqci_radius' in 1 replace did = `pro4_did' in 1 replace sdid = `pro4_sdid' in 1 replace double_did = `pro4_ddid' in 1 replace preferred = "Double-DID" in 1 replace outcome = "tapwater" in 2 replace pt_estimate = `tapwater_est' in 2 replace pt_eqci95_radius = `tapwater_eqci_radius' in 2 replace did = `tapwater_did' in 2 replace sdid = `tapwater_sdid' in 2 replace double_did = `tapwater_ddid' in 2 replace preferred = "Double-DID" in 2 replace outcome = "agrext" in 3 replace pt_estimate = `agrext_est' in 3 replace pt_eqci95_radius = `agrext_eqci_radius' in 3 replace did = `agrext_did' in 3 replace sdid = `agrext_sdid' in 3 replace double_did = `agrext_ddid' in 3 replace preferred = "None" in 3 export delimited using "`outdir'/malesky_results.csv", replace di as txt _n "Results exported to output/malesky_results.csv" restore /*--------------------------------------------------------------------------- * Section 8: Comparison with Paper Values *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt "SECTION 8: COMPARISON WITH PAPER VALUES" di as txt _dup(70) "=" _n di as txt "Expected values from Paper (Table 2 and Figure 4):" di as txt "" di as txt "Parallel Trends Test (lag=1):" di as txt " pro4: Estimate=-0.007, SE=0.096" di as txt " tapwater: Estimate=0.166, SE=0.083" di as txt " agrext: Estimate=0.198, SE=0.082" di as txt "" di as txt "Treatment Effects (lead=0):" di as txt " pro4: DID=0.084, sDID=0.087, Double-DID=0.082" di as txt " tapwater: DID=-0.078, sDID=-0.119, Double-DID=-0.043" di as txt " agrext: No credible estimator of the ATT without stronger assumptions" di as txt "" di as txt "Note: The paper values are reference targets from the article." di as txt " This walkthrough prints the public diddesign_check contract:" di as txt " raw placebo estimate/SE plus standardized EqCI95." /*--------------------------------------------------------------------------- * End of Example *---------------------------------------------------------------------------*/ di as txt _n _dup(70) "=" di as txt _n "MALESKY EXAMPLE COMPLETED SUCCESSFULLY" di as txt _dup(70) "=" _n log close