*! _diddesign_sa.ado - Staggered adoption design estimation
*!
*! Implements the staggered adoption (SA) extension of the double DID estimator
*! for settings where treatment timing varies across units. The SA design
*! estimates period-specific ATT at each adoption time t using the double DID
*! framework, then aggregates via time-weighted average:
*!
*!   tau_bar^SA = Sum_t pi_t * tau^SA(t)
*!
*! where pi_t is the proportion of newly treated units at period t. This module
*! serves as the Stata interface for SA estimation, delegating numerical
*! computation to the Mata functions in did_sa.mata.

program define _diddesign_sa, eclass
    version 16.0
    
    // =========================================================================
    // SECTION 1: SYNTAX PARSING
    // =========================================================================
    syntax varlist(min=1 fv) [if] [in], ///
        TREATment(varname)              /// Required: treatment indicator
        ID(varname)                     /// Required for SA: unit identifier
        TIME(varname)                   /// Required for SA: time identifier
        [CLuster(varname)]              /// Cluster variable for SEs
        [COVariates(string asis)]       /// Additional covariates (supports factor variables)
        [COVariatesorig(string asis)]   /// Original covariate specification for e()
        [NBoot(integer 30)]             /// Bootstrap iterations (default: 30)
        [LEAD(numlist >=0 integer)]     /// Lead values for SA design
        [THRes(integer 2)]              /// SA threshold (default: 2)
        [LEVEL(cilevel)]                /// Confidence level (default: 95)
        [SEED(integer -1)]              /// Random seed (-1 = not specified)
        [PARALlel]                      /// Use parallel computing
        [SEBoot]                        /// Use bootstrap SE/CI
        [QUIET]                         /// Suppress progress display
        [KMAX(integer 2)]               /// Max K-DID components (default: 2)
        [JTEST(string)]                 /// J-test moment selection: "on" or "off"
        TOUSE(varname)                  /// Sample marker from main program
        [IDORIG(string)]                /// Original id variable name for ereturn
        [TIMEORIG(string)]              /// Original time variable name for ereturn
        [CLUSTERORIG(string)]           /// Original cluster variable name for ereturn
    
    // Read command line from global macro
    local cmdline "$DIDDESIGN_CMDLINE"
    
    // =========================================================================
    // SECTION 2: SET DEFAULTS
    // =========================================================================
    // Parse outcome from varlist
    gettoken outcome rest : varlist
    local covariates_inline = "`rest'"
    local all_covariates = "`covariates_inline' `covariates'"
    local all_covariates = strtrim("`all_covariates'")
    
    // -------------------------------------------------------------------------
    // Duplicate Covariate Check
    // -------------------------------------------------------------------------
    // Remove duplicate covariates when combining inline and covariates() option
    
    if "`all_covariates'" != "" {
        local unique_covars : list uniq all_covariates
        local n_all : word count `all_covariates'
        local n_unique : word count `unique_covars'
        
        if `n_unique' < `n_all' {
            // Find duplicate variables by comparing original and unique lists
            local dups ""
            local seen ""
            foreach v of local all_covariates {
                local is_seen : list v in seen
                if `is_seen' {
                    local is_dup : list v in dups
                    if !`is_dup' {
                        local dups "`dups' `v'"
                    }
                }
                else {
                    local seen "`seen' `v'"
                }
            }
            local dups = strtrim("`dups'")
            display as text "Warning: Duplicate covariates detected and removed: `dups'"
            local all_covariates "`unique_covars'"
        }
    }

    local covariates_spec "`all_covariates'"
    if "`covariatesorig'" != "" {
        local covariates_spec = strtrim("`covariatesorig'")
    }
    
    // -------------------------------------------------------------------------
    // Factor Variable Expansion
    // -------------------------------------------------------------------------
    // Expand factor variables (i.var, ibn.var) into dummy variables
    // Base category is excluded to avoid collinearity with the intercept
    
    tempvar sa_cov_complete_map
    quietly gen byte `sa_cov_complete_map' = 1 if `touse'

    if "`all_covariates'" != "" {
        quietly _diddesign_expand_covariates, covars(`all_covariates') touse(`touse')
        local all_covariates "`r(varlist)'"
        local generated_covariates "`r(generated_vars)'"
        local encoded_string_covariates "`r(encoded_sources)'"
        local n_fv_expanded = r(n_factor_expanded)

        foreach covar_name of local encoded_string_covariates {
            display as text "Note: String factor covariate `covar_name' automatically encoded to numeric"
        }

        quietly markout `sa_cov_complete_map' `all_covariates', strok

        if `n_fv_expanded' > 0 {
            display as text "Note: Factor variables expanded to `n_fv_expanded' dummy variables (base/constant columns excluded)"
        }
    }

    // Treat outcome-missing unit-time cells as unavailable before enforcing the
    // SA balanced-panel contract. Otherwise markout-only gaps can bypass the
    // public guard and silently shift adoption timing in later Mata steps.
    quietly markout `touse' `outcome'
    
    // Set default values
    if "`lead'" == "" local lead = "0"
    local requested_lead "`lead'"
    local nboot_val = `nboot'
    local thres_val = `thres'
    local level_val = `level'
    local n_lead_requested : word count `requested_lead'
    
    // Set default original variable names if not provided
    if "`idorig'" == "" local idorig "`id'"
    if "`timeorig'" == "" local timeorig "`time'"
    if "`clusterorig'" == "" local clusterorig "`cluster'"
    
    // Cluster defaults to id if not specified
    if "`cluster'" == "" {
        local cluster_var ""
        // Also set clusterorig to idorig if cluster was not specified
        if "`clusterorig'" == "" local clusterorig "`idorig'"
    }
    else {
        local cluster_var = "`cluster'"
    }
    
    // Seed handling
    if `seed' == -1 {
        local seed_val = .
    }
    else {
        local seed_val = `seed'
    }
    
    // Quiet option
    local quiet_val = 0
    if "`quiet'" != "" {
        local quiet_val = 1
    }
    
    // Parallel option
    local parallel_val = ("`parallel'" != "")
    local parallel_actually_used = 0
    local n_workers_used = 0
    local n_boot_attempted = `nboot'
    
    // seboot option
    local seboot_val = ("`seboot'" != "")
    
    // =========================================================================
    // SECTION 3: DATA PREPARATION
    // =========================================================================
    // SA design requires balanced panel structure where each unit is observed
    // across all time periods. The treatment timing matrix G_{it} classifies
    // each unit-period as: newly treated (1), not-yet-treated control (0), or
    // previously treated (-1). Valid periods must have at least 'thres' newly
    // treated units to ensure reliable estimation.
    // -------------------------------------------------------------------------
    tempvar sa_obs_order
    quietly gen long `sa_obs_order' = _n if `touse'
    
    // Count observations and units
    quietly count if `touse'
    local N = r(N)
    
    // Count unique units
    tempvar unit_tag
    quietly egen `unit_tag' = tag(`id') if `touse'
    quietly count if `unit_tag' == 1 & `touse'
    local n_units = r(N)
    
    // Count unique time periods
    tempvar time_tag
    quietly egen `time_tag' = tag(`time') if `touse'
    quietly count if `time_tag' == 1 & `touse'
    local n_periods = r(N)

    // Guard against duplicated id() x time() cells before constructing SA
    // cohort timing. The current SA implementation assumes exactly one
    // observation per unit-period in the estimation sample.
    tempvar sa_dup_cell
    quietly bysort `id' `time': gen byte `sa_dup_cell' = (_N > 1) if `touse'
    quietly count if `sa_dup_cell' == 1 & `touse'
    if r(N) > 0 {
        display as error "E003: SA design requires a balanced panel with one observation per id() x time() cell"
        display as error "      Found duplicated id() x time() cells in the estimation sample"
        display as error "      Resolve missing or duplicated unit-time cells before using design(sa)"
        exit 459
    }

    // Guard against incomplete id() x time() support before constructing SA
    // cohort timing.
    local expected_cells = `n_units' * `n_periods'
    if `N' != `expected_cells' {
        display as error "E003: SA design requires a balanced panel with one observation per id() x time() cell"
        display as error "      Found `N' observations but expected `expected_cells' from `n_units' units x `n_periods' periods"
        display as error "      Resolve missing or duplicated unit-time cells before using design(sa)"
        exit 459
    }
    
    // SA DID/sDID estimation uses {t-2, t-1, t} windows, so at least three
    // distinct time periods must be present before any SA period is estimable.
    if `n_periods' < 3 {
        display as error "E008: SA design requires at least 3 time periods"
        display as error "      Found only `n_periods' time period(s)"
        display as error "      SA design needs two pre-treatment periods and one treatment period"
        exit 198
    }
    
    // =========================================================================
    // SECTION 4: VALIDATE TREATMENT VARIABLE
    // =========================================================================
    // SA design requires an absorbing (cumulative) binary treatment indicator:
    //   - Binary: D_{it} in {0, 1} for all observations
    //   - Absorbing: once treated, units remain treated (D_{it} = 1 => D_{is} = 1
    //     for all s > t)
    // This structure enables identification of treatment adoption timing A_i,
    // defined as the first period where D_{it} = 1. The treatment timing matrix
    // G_{it} is then constructed based on A_i to classify unit-period cells.
    // -------------------------------------------------------------------------
    
    // Preserve the user-facing treatment name for e() metadata.
    local treatment_orig "`treatment'"

    // Canonicalize treatment to exact 0/1 before validating absorbing paths.
    tempvar treatment_work
    quietly gen double `treatment_work' = . if `touse'
    quietly replace `treatment_work' = 0 if abs(`treatment') < 1e-6 & `touse'
    quietly replace `treatment_work' = 1 if abs(`treatment' - 1) < 1e-6 & `touse'

    quietly count if missing(`treatment_work') & `touse'
    if r(N) > 0 {
        display as error "E007: Treatment variable must be binary (0/1)"
        display as error "      Found `r(N)' observations outside the 1e-6 tolerance around 0/1"
        display as error "      SA design requires cumulative binary treatment indicator"
        exit 459
    }

    quietly count if `treatment_work' == 1 & `touse'
    if r(N) == 0 {
        display as error "E007: Treatment variable must contain both 0 and 1 values"
        display as error "      SA design requires cumulative binary treatment indicator"
        exit 459
    }

    quietly count if `treatment_work' == 0 & `touse'
    if r(N) == 0 {
        display as error "E007: Treatment variable must contain both 0 and 1 values"
        display as error "      SA design requires cumulative binary treatment indicator"
        exit 459
    }

    local treatment "`treatment_work'"
    
    // Check cumulative treatment (absorbing treatment)
    // Treatment must only transition from 0 to 1, never decrease
    tempvar treat_lag treat_diff
    quietly {
        bysort `id' (`time'): gen `treat_lag' = `treatment'[_n-1] if `touse'
        gen `treat_diff' = `treatment' - `treat_lag' if `touse' & `treat_lag' != .
        count if `treat_diff' < 0 & `touse'
    }
    if r(N) > 0 {
        display as error "E003: Treatment variable must be cumulative (absorbing)"
        display as error "      Found `r(N)' observations with treatment decreasing over time"
        display as error "      SA design requires treatment to only transition from 0 to 1"
        exit 459
    }

    // The validation bysorts above should not leak label-dependent ordering
    // into the Mata bootstrap path. Restore the caller's sample order before
    // _did_sa_prepare_data() reads the data with st_data().
    quietly sort `sa_obs_order'
    
    // =========================================================================
    // SECTION 5: CALL MATA SA ESTIMATION
    // =========================================================================
    // Time-average SA-ATT (Staggered Adoption ATT):
    //
    //   tau_bar^SA = Sum_{t in T} pi_t * tau^SA(t)
    //
    // where:
    //   - pi_t = n_{1t} / Sum_{t'} n_{1t'} is the time weight (proportion of
    //     newly treated units at period t)
    //   - tau^SA(t) is the period-specific double DID estimate combining
    //     tau^SA_DID(t) and tau^SA_sDID(t) via GMM optimal weighting
    //   - Estimation uses three consecutive periods {t-2, t-1, t} per period t
    //
    // GMM weight matrix W = Omega^{-1} minimizes variance under heteroskedasticity.
    // Bootstrap variance estimation resamples units (not observations) with
    // replacement, recomputing all period-specific estimates in each iteration.
    // -------------------------------------------------------------------------
    
    // Set random seed if specified for bootstrap reproducibility
    if `seed_val' != . {
        set seed `seed_val'
    }
    
    // Convert lead numlist to Mata format
    local lead_mata = subinstr("`lead'", " ", ", ", .)
    local n_lead : word count `lead'
    
    // Prepare data in Mata
    mata: st_local("mata_rc", strofreal(_did_sa_prepare_data("`outcome'", "`treatment'", "`id'", "`time'", ///
                               "`cluster_var'", "`all_covariates'", "`touse'")))

    if "`generated_covariates'" != "" {
        capture drop `generated_covariates'
    }
    
    if `mata_rc' != 0 {
        display as error "E011: SA data preparation failed"
        display as error "      No valid observations selected for analysis"
        exit 498
    }
    
    // Parse kmax and jtest for SA K-DID
    local kmax_val = max(1, `kmax')
    local jtest_val = lower("`jtest'")
    if "`jtest_val'" == "" | "`jtest_val'" == "off" {
        local jtest_on = 0
    }
    else if "`jtest_val'" == "on" {
        local jtest_on = 1
    }
    else {
        display as error "E020: jtest() must be 'on' or 'off'"
        exit 198
    }
    // kmax=1: route to K-DID path (single moment = pure SA-DID)
    // kmax=2: stay on original K=2 SA Double-DID path (backward compatible)
    // kmax>2: use generalized K-DID path
    local use_sa_kdid_path = (`kmax_val' != 2)

    // Route: K-DID SA path or standard K=2 path
    if `use_sa_kdid_path' {
        // Generalized K-DID SA path (kmax != 2)
        mata: st_local("mata_rc", strofreal( ///
            _did_sa_main_k((`lead_mata'), `nboot_val', `thres_val', `level_val', `kmax_val', `jtest_on', `quiet_val')))
    }
    else if `parallel_val' == 1 {
        // Parallel SA bootstrap path:
        // 1. Compute point estimate on original data (needed for GMM weights)
        // 2. Run parallel bootstrap via coordinator
        // 3. Aggregate results via _did_sa_main_from_boot()

        // Set did_opt fields required by sa_double_did() (not set by _did_sa_prepare_data)
        mata: did_opt.thres = `thres_val'
        mata: did_opt.lead = (`lead_mata')
        mata: did_opt.n_boot = `nboot_val'
        mata: did_opt.level = `level_val'
        mata: did_opt.quiet = `quiet_val'

        // Compute SA point estimate and store as Mata external for later GMM
        mata: _par_sa_point_est = sa_double_did(did_dat, did_opt)

        // Build coordinator option string
        local parallel_route_opts "outcome(`outcome') treatment(`treatment')"
        local parallel_route_opts "`parallel_route_opts' id(`id') time(`time')"
        local parallel_route_opts "`parallel_route_opts' nboot(`nboot_val') lead(`lead')"
        local parallel_route_opts "`parallel_route_opts' thres(`thres_val') level(`level_val')"
        local parallel_route_opts "`parallel_route_opts' design(sa) touse(`touse')"
        local parallel_route_opts "`parallel_route_opts' ispanel(1) seboot(`seboot_val')"
        if `seed_val' != . {
            local parallel_route_opts "`parallel_route_opts' seed(`seed_val')"
        }
        if "`cluster_var'" != "" {
            local parallel_route_opts "`parallel_route_opts' cluster(`cluster_var')"
        }
        if "`all_covariates'" != "" {
            local parallel_route_opts "`parallel_route_opts' covariates(`all_covariates')"
        }
        if "`quiet'" != "" {
            local parallel_route_opts "`parallel_route_opts' quiet"
        }

        capture noisily _diddesign_parallel_boot, `parallel_route_opts'
        local par_rc = _rc
        local parallel_actually_used = r(parallel_used)
        local n_workers_used = r(n_workers)

        if `par_rc' != 0 & `parallel_actually_used' != 0 {
            exit `par_rc'
        }

        if `parallel_actually_used' == 1 {
            // Parallel succeeded: load combined bootstrap matrix into Mata
            local boot_combined_file "`r(boot_combined)'"
            local boot_tmpdir_path "`r(boot_tmpdir)'"
            local n_boot_success_par = r(n_boot_success)
            local n_boot_attempted = r(n_boot_attempted)

            // Transfer boot results to Mata external _par_boot_est
            preserve
            qui use "`boot_combined_file'", clear

            local boot_ncols = 0
            foreach v of varlist _boot_col* {
                local ++boot_ncols
            }

            if `boot_ncols' > 0 & _N > 0 {
                unab boot_cols : _boot_col*
                putmata _par_boot_est = (`boot_cols'), replace
                restore
                capture erase "`boot_combined_file'"
                capture rmdir "`boot_tmpdir_path'"

                // Run SA GMM pipeline from pre-collected bootstrap
                mata: st_local("mata_rc", strofreal( ///
                    _did_sa_main_from_boot((`lead_mata'), `level_val', `quiet_val')))
            }
            else {
                restore
                capture erase "`boot_combined_file'"
                capture rmdir "`boot_tmpdir_path'"
                display as text "Note: Parallel bootstrap produced no valid results; falling back to sequential."
                local parallel_actually_used = 0
                mata: st_local("mata_rc", strofreal( ///
                    _did_sa_main((`lead_mata'), `nboot_val', `thres_val', `level_val', `quiet_val')))
            }
        }
        else {
            // Graceful degradation to sequential
            mata: st_local("mata_rc", strofreal( ///
                _did_sa_main((`lead_mata'), `nboot_val', `thres_val', `level_val', `quiet_val')))
        }
    }
    else {
        // Sequential bootstrap (existing Mata path, unchanged)
        mata: st_local("mata_rc", strofreal(_did_sa_main((`lead_mata'), `nboot_val', `thres_val', `level_val', `quiet_val')))
    }
    
    if `mata_rc' != 0 {
        // Provide specific error messages based on error code
        if `mata_rc' == 1 {
            display as error "E011: SA estimation failed - could not create treatment timing matrix (Gmat)"
        }
        else if `mata_rc' == 2 {
            display as error "E011: SA estimation failed - no valid periods found"
            display as error "      Try reducing the threshold value (thres option)"
        }
        else if `mata_rc' == 3 {
            display as error "E011: SA estimation failed - point estimation returned missing values"
        }
        else if `mata_rc' == 4 {
            display as error "E011: SA estimation failed - insufficient valid bootstrap iterations"
            display as error "      Try increasing the number of bootstrap iterations (nboot option)"
        }
        else {
            display as error "E011: SA estimation failed in Mata (error code: `mata_rc')"
        }
        exit 498
    }
    
    // =========================================================================
    // SECTION 6: RETRIEVE RESULTS FROM MATA
    // =========================================================================
    // Transfer estimation metadata from Mata global scalars to Stata locals
    mata: st_local("n_periods_valid", strofreal(_sa_n_periods_valid))
    mata: st_local("n_boot_success", strofreal(_sa_n_boot_success))
    
    // =========================================================================
    // SECTION 7: STORE e() RETURNS
    // =========================================================================
    // Transfer estimation results from Mata to Stata e() class for post-estimation
    // commands. Results include: coefficient vector (b), variance matrix (V),
    // detailed estimates table, GMM weight matrix (W), and time weights (pi_t).
    // -------------------------------------------------------------------------
    
    tempname b_mat V_mat estimates_mat lead_mat weights_mat W_mat vcov_gmm_mat bootstrap_support_mat time_weights_mat time_weight_period_idx_mat time_weight_periods_mat time_weights_by_lead_mat
    
    mata: st_matrix("`b_mat'", _sa_b)
    mata: st_matrix("`V_mat'", _sa_V)
    mata: st_matrix("`estimates_mat'", _sa_estimates)
    mata: st_matrix("`lead_mat'", _sa_lead_values)
    mata: st_matrix("`weights_mat'", _sa_weights)
    mata: st_matrix("`W_mat'", _sa_W)
    mata: st_matrix("`vcov_gmm_mat'", _sa_vcov_gmm)
    mata: st_matrix("`bootstrap_support_mat'", _sa_bootstrap_support)
    mata: st_matrix("`time_weights_mat'", _sa_time_weights)
    mata: st_matrix("`time_weight_period_idx_mat'", _sa_time_weight_period_idx)
    mata: st_matrix("`time_weights_by_lead_mat'", _sa_time_weights_by_lead)

    tempvar touse_map
    quietly gen byte `touse_map' = `touse'
    
    // Validate result matrices exist and are non-empty
    capture confirm matrix `lead_mat'
    if _rc != 0 {
        display as error "Error: SA estimation produced no valid results (lead_mat not found)"
        exit 498
    }
    if colsof(`lead_mat') == 0 {
        display as error "Error: SA estimation produced no valid results (lead_mat is empty)"
        exit 498
    }
    
    // Reshape flattened W and VCOV matrices to proper 2x2 form for single lead case
    // Mata vec() uses column-major order: [W11, W21, W12, W22] for a 2x2 matrix
    // For multiple leads, matrices remain as n_lead x 4 (each row is one flattened 2x2)
    // K-DID path keeps the n_lead x kmax^2 layout as-is.
    local n_lead = colsof(`lead_mat')
    if !`use_sa_kdid_path' & `n_lead' == 1 {
        // Reconstruct 2x2 GMM weight matrix W = Omega^{-1}
        matrix `W_mat' = (`W_mat'[1,1], `W_mat'[1,3] \ `W_mat'[1,2], `W_mat'[1,4])
        
        // Reconstruct 2x2 variance-covariance matrix Omega of moment conditions
        matrix `vcov_gmm_mat' = (`vcov_gmm_mat'[1,1], `vcov_gmm_mat'[1,3] \ `vcov_gmm_mat'[1,2], `vcov_gmm_mat'[1,4])
    }

    // Single-lead posting still needs the bridge from e(vcov_gmm) into the
    // public 3x3 block. Multi-lead V_mat is fully assembled in Mata on the
    // jointly observed posted bootstrap vector and must not be overwritten here.
    // K-DID path: V_mat diagonal is already filled by _did_sa_main_k; skip bridging.
    if !`use_sa_kdid_path' & `n_lead' == 1 {
        scalar __sa_var_did = `V_mat'[2,2]
        scalar __sa_cov_did_sdid = `vcov_gmm_mat'[1,2]
        scalar __sa_var_sdid = `V_mat'[3,3]
        scalar __sa_w_did = `weights_mat'[1,1]
        scalar __sa_w_sdid = `weights_mat'[1,2]
        scalar __sa_cov_ddid_did = __sa_w_did * __sa_var_did + __sa_w_sdid * __sa_cov_did_sdid
        scalar __sa_cov_ddid_sdid = __sa_w_did * __sa_cov_did_sdid + __sa_w_sdid * __sa_var_sdid

        if !missing(__sa_cov_ddid_did) {
            matrix `V_mat'[1,2] = __sa_cov_ddid_did
            matrix `V_mat'[2,1] = __sa_cov_ddid_did
        }
        if !missing(__sa_cov_ddid_sdid) {
            matrix `V_mat'[1,3] = __sa_cov_ddid_sdid
            matrix `V_mat'[3,1] = __sa_cov_ddid_sdid
        }
        if !missing(__sa_cov_did_sdid) {
            matrix `V_mat'[2,3] = __sa_cov_did_sdid
            matrix `V_mat'[3,2] = __sa_cov_did_sdid
        }
    }
    
    // Set row and column names for e(b)
    local b_names ""
    if `use_sa_kdid_path' {
        foreach l of numlist `lead' {
            local b_names "`b_names' SA_KDID:lead_`l'"
            forvalues kk = 1/`kmax_val' {
                local b_names "`b_names' SA_k`kk':lead_`l'"
            }
        }
    }
    else {
        foreach l of numlist `lead' {
            local b_names "`b_names' SA_dDID:lead_`l' SA_DID:lead_`l' SA_sDID:lead_`l'"
        }
    }
    local b_names = trim("`b_names'")
    matrix colnames `b_mat' = `b_names'
    
    // Set row and column names for e(V)
    matrix rownames `V_mat' = `b_names'
    matrix colnames `V_mat' = `b_names'
    
    // e(b) / e(V) cannot contain missing values. Keep all requested leads in
    // e(estimates), but omit non-estimable coefficients from the posted result.
    tempname b_post V_post
    local post_idx ""
    local post_ddid_n = 0
    local _block_size = 3
    if `use_sa_kdid_path' {
        local _block_size = 1 + `kmax_val'
    }
    forvalues j = 1/`=colsof(`b_mat')' {
        local b_val = el(`b_mat', 1, `j')
        local v_val = el(`V_mat', `j', `j')
        if !missing(`b_val') & !missing(`v_val') {
            local post_idx "`post_idx' `j'"
            if mod(`j' - 1, `_block_size') == 0 {
                local ++post_ddid_n
            }
        }
    }
    local post_idx = trim("`post_idx'")
    local post_ncoef : word count `post_idx'
    
    if `post_ncoef' == 0 {
        display as error "E011: SA estimation failed - no estimable coefficients remain after handling missing components"
        exit 498
    }
    
    local post_idx_mata = subinstr("`post_idx'", " ", ", ", .)
    mata: idx = (`post_idx_mata')
    mata: st_matrix("`b_post'", st_matrix("`b_mat'")[1, idx])
    mata: st_matrix("`V_post'", st_matrix("`V_mat'")[idx, idx])
    mata: mata drop idx
    
    local post_names ""
    foreach idx of local post_idx {
        local cname : word `idx' of `b_names'
        local post_names "`post_names' `cname'"
    }
    local post_names = trim("`post_names'")
    
    matrix colnames `b_post' = `post_names'
    matrix rownames `V_post' = `post_names'
    matrix colnames `V_post' = `post_names'

    local identified_leads ""
    local unidentified_leads ""
    local n_lead_identified 0
    foreach l of numlist `lead' {
        local _lead_found 0
        if `use_sa_kdid_path' {
            local has_final : list posof "SA_KDID:lead_`l'" in post_names
            local has_k1 : list posof "SA_k1:lead_`l'" in post_names
            if `has_final' > 0 | `has_k1' > 0 {
                local _lead_found 1
            }
        }
        else {
            local has_ddid : list posof "SA_dDID:lead_`l'" in post_names
            local has_did : list posof "SA_DID:lead_`l'" in post_names
            local has_sdid : list posof "SA_sDID:lead_`l'" in post_names
            if `has_ddid' > 0 | `has_did' > 0 | `has_sdid' > 0 {
                local _lead_found 1
            }
        }
        if `_lead_found' {
            local identified_leads "`identified_leads' `l'"
            local ++n_lead_identified
        }
        else {
            local unidentified_leads "`unidentified_leads' `l'"
        }
    }
    local identified_leads : list retokenize identified_leads
    local unidentified_leads : list retokenize unidentified_leads

    local posted_lead_pos ""
    local current_n_lead : word count `lead'
    forvalues cur_i = 1/`current_n_lead' {
        local cur_lead : word `cur_i' of `lead'
        local lead_is_identified : list cur_lead in identified_leads
        if `lead_is_identified' {
            local posted_lead_pos "`posted_lead_pos' `cur_i'"
        }
    }
    local posted_lead_pos : list retokenize posted_lead_pos

    tempname lead_posted_mat
    local posted_lead_pos_mata = subinstr("`posted_lead_pos'", " ", ", ", .)
    mata: idx = (`posted_lead_pos_mata')
    mata: st_matrix("`lead_posted_mat'", st_matrix("`lead_mat'")[1, idx])
    mata: mata drop idx

    // Reconstruct the effective SA estimation sample from the valid period
    // support returned by Mata. Each estimable cohort contributes units that are
    // not previously treated at period t and observations in the union of the
    // required pre/post windows used by the requested lead() values.
    tempvar sa_esample sa_unit_tag sa_cluster_tag sa_regress_sample first_treat_period
    quietly gen byte `sa_esample' = 0
    quietly gen byte `sa_regress_sample' = 0
    quietly bysort `id': egen double `first_treat_period' = min(cond(`touse_map' & `treatment' == 1, `time', .))

    local n_tw = rowsof(`time_weights_mat')
    if `n_tw' > 0 {
        quietly levelsof `time' if `touse_map', local(time_levels_support)
        local n_requested_leads : word count `lead'
        forvalues i = 1/`n_tw' {
            local period_idx = el(`time_weight_period_idx_mat', `i', 1)
            local period_pos = int(`period_idx')
            local period_pre1_pos = `period_pos' - 1
            local period_pre2_pos = `period_pos' - 2

            if `period_pre1_pos' < 1 | `period_pre2_pos' < 1 {
                continue
            }

            local period_pre2 : word `period_pre2_pos' of `time_levels_support'
            local period_pre1 : word `period_pre1_pos' of `time_levels_support'
            local period_cur : word `period_pos' of `time_levels_support'

            forvalues lead_idx = 1/`n_requested_leads' {
                local lead_step : word `lead_idx' of `lead'

                if rowsof(`time_weights_by_lead_mat') == `n_tw' {
                    local lead_weight = el(`time_weights_by_lead_mat', `i', `lead_idx')
                    if missing(`lead_weight') | `lead_weight' <= 0 {
                        continue
                    }
                }

                local period_post_pos = `period_pos' + `lead_step'
                local period_post : word `period_post_pos' of `time_levels_support'
                if "`period_post'" == "" {
                    continue
                }

                // Appendix E.3 eligibility for lead s keeps treated units with
                // A_i = t and control units with A_i > t+s (or never treated).
                quietly replace `sa_esample' = 1 if `touse_map' & ///
                    (`first_treat_period' == `period_cur' | missing(`first_treat_period') | `first_treat_period' > `period_post') & ///
                    (`time' == `period_pre2' | `time' == `period_pre1' | `time' == `period_post')

                quietly replace `sa_regress_sample' = 1 if `touse_map' & ///
                    (`first_treat_period' == `period_cur' | missing(`first_treat_period') | `first_treat_period' > `period_post') & ///
                    (`time' == `period_pre1' | `time' == `period_post')
            }
        }
    }

    // Rows with missing outcomes never enter any valid SA DID/sDID component,
    // even if they belong to the raw support window. Exclude them so the
    // posted sample and header reflect observations that can actually
    // contribute to estimation under the current sample contract.
    quietly replace `sa_esample' = 0 if `sa_esample' == 1 & missing(`outcome')

    // Covariates matter only on rows that enter the SA DID/sDID regressions.
    // Keep the broader support window for outcome-based transformations, but do
    // not count support rows that the regression layer will drop for missing X.
    if "`all_covariates'" != "" {
        quietly replace `sa_esample' = 0 if `sa_esample' == 1 & `sa_regress_sample' == 1 & ///
            `sa_cov_complete_map' == 0
    }

    quietly count if `sa_esample' == 1 & `touse_map'
    local N_support = r(N)
    if `N_support' == 0 {
        display as error "E011: SA estimation failed - effective support sample is empty after period filtering"
        exit 498
    }

    quietly egen `sa_unit_tag' = tag(`id') if `sa_esample' == 1 & `touse_map'
    quietly count if `sa_unit_tag' == 1 & `sa_esample' == 1 & `touse_map'
    local n_units_support = r(N)
    local support_cluster_var "`cluster_var'"
    if "`support_cluster_var'" == "" {
        local support_cluster_var "`id'"
    }
    quietly egen `sa_cluster_tag' = tag(`support_cluster_var') if `sa_esample' == 1 & `touse_map'
    quietly count if `sa_cluster_tag' == 1 & `sa_esample' == 1 & `touse_map'
    local n_clusters_support = r(N)

    // Bootstrap inference is defined on treatment-assignment blocks. If the
    // final SA support sample collapses to fewer than two clusters after the
    // same Appendix E.3 eligibility and missing-value filtering used for
    // estimation, fail closed instead of posting degenerate near-zero SEs.
    if `n_clusters_support' < 2 {
        display as error "E003: At least 2 clusters are required for bootstrap inference"
        display as error "      Found only `n_clusters_support' unique cluster in the final SA support sample"
        exit 198
    }

    // Preserve the caller's dataset order after the post-Mata bysort/egen
    // support reconstruction above so repeated diddesign calls in the same
    // session do not inherit label-dependent row reordering.
    quietly sort `sa_obs_order'

    local N = `N_support'
    local n_units = `n_units_support'
    
    // Post filtered b and V matrices with sample marker
    ereturn post `b_post' `V_post', esample(`sa_esample') obs(`N') depname("`outcome'")
    
    // --- Scalars ---
    ereturn scalar n_units = `n_units'
    ereturn scalar n_periods = `n_periods'
    ereturn scalar n_periods_valid = `n_periods_valid'
    ereturn scalar n_boot = `nboot_val'
    ereturn scalar n_clusters = `n_clusters_support'
    ereturn scalar level = `level_val'
    ereturn scalar n_lead = `n_lead_identified'
    ereturn scalar n_lead_requested = `n_lead_requested'
    ereturn scalar n_lead_filtered = 0
    ereturn scalar n_lead_identified = `n_lead_identified'
    ereturn scalar thres = `thres_val'
    ereturn scalar is_panel = 1
    ereturn scalar seboot = `seboot_val'
    ereturn scalar kmax = `kmax_val'
    ereturn scalar jtest_on = `jtest_on'
    ereturn scalar parallel = `parallel_actually_used'
    if `parallel_actually_used' {
        ereturn scalar n_workers = `n_workers_used'
        ereturn scalar n_boot_attempted = `n_boot_attempted'
    }
    
    // Always expose bootstrap success counts for auditability.
    if "`n_boot_success'" != "" & "`n_boot_success'" != "." {
        ereturn scalar n_boot_success = `n_boot_success'
    }
    
    // --- Macros ---
    ereturn local cmd "diddesign"
    ereturn local cmdline "`cmdline'"
    ereturn local design "sa"
    ereturn local depvar "`outcome'"
    ereturn local treatment "`treatment_orig'"
    ereturn local covariates "`covariates_spec'"
    ereturn local covars "`covariates_spec'"
    ereturn local id "`idorig'"
    ereturn local time "`timeorig'"
    ereturn local clustvar "`clusterorig'"
    ereturn local datatype "panel"
    ereturn local sample_ifin ""
    ereturn local ci_method "bootstrap"
    ereturn local lead "`identified_leads'"
    ereturn local requested_lead "`requested_lead'"
    ereturn local filtered_lead ""
    ereturn local identified_lead "`identified_leads'"
    ereturn local unidentified_lead "`unidentified_leads'"
    ereturn local properties "b V"
    
    // --- Additional Matrices ---
    // Set row and column names for e(estimates)
    local est_rownames ""
    if `use_sa_kdid_path' {
        foreach l of numlist `lead' {
            local est_rownames "`est_rownames' SA_final:lead_`l'"
            forvalues kk = 1/`kmax_val' {
                local est_rownames "`est_rownames' SA_k`kk':lead_`l'"
            }
        }
    }
    else {
        foreach l of numlist `lead' {
            local est_rownames "`est_rownames' SA_dDID:lead_`l' SA_DID:lead_`l' SA_sDID:lead_`l'"
        }
    }
    local est_rownames = trim("`est_rownames'")
    matrix rownames `estimates_mat' = `est_rownames'
    if `use_sa_kdid_path' {
        matrix colnames `estimates_mat' = lead estimate std_error ci_lo ci_hi weight component_k selected_jtest selected_final dropped_jtest dropped_numerical K_init K_sel K_final
    }
    else {
        matrix colnames `estimates_mat' = lead estimate std_error ci_lo ci_hi weight
    }
    
    // Set names for e(lead_values)
    matrix colnames `lead_posted_mat' = `identified_leads'
    
    // Set names for e(weights)
    local wt_rownames ""
    foreach l of numlist `lead' {
        local wt_rownames "`wt_rownames' lead_`l'"
    }
    local wt_rownames = trim("`wt_rownames'")
    matrix rownames `weights_mat' = `wt_rownames'
    if `use_sa_kdid_path' {
        local wt_colnames ""
        forvalues kk = 1/`kmax_val' {
            local wt_colnames "`wt_colnames' w_k`kk'"
        }
        matrix colnames `weights_mat' = `wt_colnames'
    }
    else {
        matrix colnames `weights_mat' = w_did w_sdid
    }
    
    // Make a copy of estimates_mat for display
    tempname display_mat
    matrix `display_mat' = `estimates_mat'
    
    // Store additional matrices
    ereturn matrix estimates = `estimates_mat'
    ereturn matrix lead_values = `lead_posted_mat'
    ereturn matrix weights = `weights_mat'
    ereturn matrix W = `W_mat'
    ereturn matrix vcov_gmm = `vcov_gmm_mat'
    matrix rownames `bootstrap_support_mat' = `wt_rownames'
    if `use_sa_kdid_path' {
        local bs_colnames ""
        forvalues kk = 1/`kmax_val' {
            local bs_colnames "`bs_colnames' boot_k`kk'"
        }
        matrix colnames `bootstrap_support_mat' = `bs_colnames'
    }
    else {
        matrix colnames `bootstrap_support_mat' = n_valid_did n_valid_sdid n_joint_valid
    }
    ereturn matrix bootstrap_support = `bootstrap_support_mat'
    
    // Set row/column names for time_weights matrix
    local n_tw = rowsof(`time_weights_mat')
    if `n_tw' > 0 {
        local tw_rownames ""
        local tw_labels ""
        matrix `time_weight_periods_mat' = J(`n_tw', 1, .)
        quietly levelsof `time' if `touse_map', local(time_levels_used)
        forvalues i = 1/`n_tw' {
            local period_idx = el(`time_weight_period_idx_mat', `i', 1)
            local period_pos = int(`period_idx')
            local period_value : word `period_pos' of `time_levels_used'
            local period_label "`period_value'"
            local period_orig_label ""
            capture confirm variable `timeorig'
            if _rc == 0 {
                capture levelsof `timeorig' if `time' == `period_value' & `touse_map', local(period_orig_label)
                if _rc == 0 & "`period_orig_label'" != "" {
                    local period_label "`period_orig_label'"
                }
            }
            local period_stub = strtoname("time_`period_label'")
            if "`period_stub'" == "" {
                local period_stub "time_`period_pos'"
            }
            local tw_rownames "`tw_rownames' `period_stub'"
            matrix `time_weight_periods_mat'[`i', 1] = `period_value'
            if `i' == 1 {
                local tw_labels "`period_label'"
            }
            else {
                local tw_labels "`tw_labels'|`period_label'"
            }
        }
        local tw_rownames = trim("`tw_rownames'")
        matrix rownames `time_weights_mat' = `tw_rownames'
        matrix colnames `time_weights_mat' = weight
        matrix rownames `time_weight_periods_mat' = `tw_rownames'
        matrix colnames `time_weight_periods_mat' = period
        ereturn matrix time_weight_periods = `time_weight_periods_mat'
        ereturn local time_weight_labels "`tw_labels'"
        if rowsof(`time_weights_by_lead_mat') == `n_tw' {
            local tw_lead_colnames ""
            foreach l of numlist `lead' {
                local tw_lead_colnames "`tw_lead_colnames' lead_`l'"
            }
            local tw_lead_colnames = trim("`tw_lead_colnames'")
            matrix rownames `time_weights_by_lead_mat' = `tw_rownames'
            matrix colnames `time_weights_by_lead_mat' = `tw_lead_colnames'
            ereturn matrix time_weights_by_lead = `time_weights_by_lead_mat'
        }
    }
    ereturn matrix time_weights = `time_weights_mat'

    // K-DID specific matrices
    if `use_sa_kdid_path' {
        tempname sa_k_summary_mat sa_jtest_stats_mat
        mata: st_matrix("`sa_k_summary_mat'", _sa_k_summary)
        mata: st_matrix("`sa_jtest_stats_mat'", _sa_jtest_stats)
        matrix rownames `sa_k_summary_mat' = `wt_rownames'
        matrix colnames `sa_k_summary_mat' = K_init K_sel K_final
        matrix rownames `sa_jtest_stats_mat' = `wt_rownames'
        matrix colnames `sa_jtest_stats_mat' = J_stat J_df J_pval
        ereturn matrix k_summary = `sa_k_summary_mat'
        ereturn matrix jtest_stats = `sa_jtest_stats_mat'
        ereturn local moment_rule "drop-highest-order-on-rejection"
        ereturn local fallback_rule "drop-highest-order-until-invertible"
    }
    
    if `post_ncoef' < colsof(`b_mat') {
        display as text "Note: Some SA estimators are not identified for the requested lead(s)."
        display as text "      They are stored as missing in e(estimates) and omitted from e(b) and e(V)."
    }
    
    // =========================================================================
    // SECTION 8: DISPLAY RESULTS
    // =========================================================================
    _diddesign_display_header, cmd("diddesign") design("sa") ///
        datatype("Panel") n(`N') n_units(`n_units') ///
        n_periods(`n_periods') n_boot(`nboot_val') ///
        cluster("`clusterorig'") thres(`thres_val')
    
    // Display valid periods info
    display as text ""
    display as text "Valid periods:    " as result "`n_periods_valid'" ///
            as text " (threshold = `thres_val')"
    
    // Display CI method
    display as text ""
    display as text "Confidence intervals: Bootstrap percentile (`level_val'%)"
    
    // Display results table for each lead
    local row = 1
    if `use_sa_kdid_path' {
        display as text "Generalized SA-K-DID: kmax = `kmax_val'" _continue
        if `jtest_on' {
            display as text ", J-test = on"
        }
        else {
            display as text ", J-test = off"
        }
    }
    foreach l of numlist `lead' {
        // Display lead header
        display as text ""
        display as text "Results (lead = `l'):"
        display as text "{hline 78}"
        display as text %13s "Estimator" " | " %9s "Estimate" %10s "Std.Err." %20s "[`level_val'% Conf. Interval]" %9s "Weight"
        display as text "{hline 14}+{hline 64}"

        if `use_sa_kdid_path' {
            // SA K-DID path: final estimate + k components
            local est = `display_mat'[`row', 2]
            local se = `display_mat'[`row', 3]
            local ci_lo = `display_mat'[`row', 4]
            local ci_hi = `display_mat'[`row', 5]
            local k_f = `display_mat'[`row', 14]
            _diddesign_display_result, label("SA-K-DID (K=`k_f')") ///
                estimate(`est') se(`se') ci_low(`ci_lo') ci_high(`ci_hi') weight(.)
            local row = `row' + 1

            forvalues kk = 1/`kmax_val' {
                local est = `display_mat'[`row', 2]
                local se = `display_mat'[`row', 3]
                local ci_lo = `display_mat'[`row', 4]
                local ci_hi = `display_mat'[`row', 5]
                local wt = `display_mat'[`row', 6]
                _diddesign_display_result, label("SA-k`kk'") ///
                    estimate(`est') se(`se') ci_low(`ci_lo') ci_high(`ci_hi') weight(`wt')
                local row = `row' + 1
            }
        }
        else {
            // Standard K=2 path: SA-Double-DID / SA-DID / SA-sDID
            local est = `display_mat'[`row', 2]
            local se = `display_mat'[`row', 3]
            local ci_lo = `display_mat'[`row', 4]
            local ci_hi = `display_mat'[`row', 5]
            local wt = `display_mat'[`row', 6]
            _diddesign_display_result, label("SA-Double-DID") ///
                estimate(`est') se(`se') ci_low(`ci_lo') ci_high(`ci_hi') weight(`wt')
            local row = `row' + 1

            local est = `display_mat'[`row', 2]
            local se = `display_mat'[`row', 3]
            local ci_lo = `display_mat'[`row', 4]
            local ci_hi = `display_mat'[`row', 5]
            local wt = `display_mat'[`row', 6]
            _diddesign_display_result, label("SA-DID") ///
                estimate(`est') se(`se') ci_low(`ci_lo') ci_high(`ci_hi') weight(`wt')
            local row = `row' + 1

            local est = `display_mat'[`row', 2]
            local se = `display_mat'[`row', 3]
            local ci_lo = `display_mat'[`row', 4]
            local ci_hi = `display_mat'[`row', 5]
            local wt = `display_mat'[`row', 6]
            _diddesign_display_result, label("SA-sDID") ///
                estimate(`est') se(`se') ci_low(`ci_lo') ci_high(`ci_hi') weight(`wt')
            local row = `row' + 1
        }
    }
    
    display as text "{hline 78}"
    
    // Display notes
    display as text ""
    if `use_sa_kdid_path' {
        display as text "Note: SA-K-DID combines k=1,...,K components using optimal GMM weights."
        display as text "      Weight column shows GMM weights for each component."
    }
    else {
        display as text "Note: Weights sum to 1. CI computed using bootstrap quantiles."
    }
    
end