*! _rbfmvar_mata.ado — Mata engine for rbfmvar
*! Implements the RBFM-VAR estimator from:
*!   Chang, Y. (2000). Vector Autoregressions with Unknown Mixtures
*!   of I(0), I(1), and I(2) Components.
*!   Econometric Theory, 16(6), 905-926.
*!
*! Version 2.0.0, February 2026
*! Author: Dr. Merwan Roudane
*! Email: merwanroudane920@gmail.com

capture mata: mata drop rbfmvar_estimate()
capture mata: mata drop _rbfm_ecm_reparameterize()
capture mata: mata drop _rbfm_ols_var()
capture mata: mata drop _rbfm_construct_vhat()
capture mata: mata drop _rbfm_kernel_weight()
capture mata: mata drop _rbfm_kernel_lrv()
capture mata: mata drop _rbfm_kernel_olrv()
capture mata: mata drop _rbfm_andrews_bandwidth()
capture mata: mata drop _rbfm_correct()
capture mata: mata drop _rbfm_wald_test()
capture mata: mata drop _rbfm_granger_test()
capture mata: mata drop _rbfm_select_lags()
capture mata: mata drop _rbfm_irf_compute()
capture mata: mata drop _rbfm_irf_bootstrap()
capture mata: mata drop _rbfm_fevd_compute()
capture mata: mata drop _rbfm_forecast_compute()
capture mata: mata drop _rbfm_ic_table()

version 14.0
mata:
mata set matastrict on

// ========================================================================
// FUNCTION: rbfmvar_estimate()
// Master orchestrator — called from Stata, posts all results
// ========================================================================

void rbfmvar_estimate(
    string scalar varnames,
    real   scalar p_lags,
    string scalar kernel_type,
    real   scalar bw_user,
    string scalar granger_spec,
    string scalar touse_name,
    real   scalar irf_horizon,
    real   scalar irf_boot_reps,
    real   scalar irf_ci_level,
    real   scalar do_fevd,
    real   scalar forecast_steps)
{
    real matrix Y_raw, Y, Z, W, X, F_ols, E_hat, V_hat
    real matrix Omega_ev, Omega_vv, Delta_vdw
    real matrix F_plus, Gamma_plus, A_plus
    real matrix Sigma_e
    real scalar T, n, nobs, bw
    string rowvector vnames
    real colvector touse_vec

    // --- Parse variable names ---
    vnames = tokens(varnames)
    n = length(vnames)

    // --- Load data ---
    touse_vec = st_data(., touse_name)
    Y_raw = st_data(., vnames)

    // Keep only touse observations
    Y_raw = select(Y_raw, touse_vec)
    nobs = rows(Y_raw)

    // --- Lag selection if requested ---
    // (p_lags is already determined by the .ado caller)

    // --- ECM reparameterization: build Z, W, Y for Eq. 3 ---
    _rbfm_ecm_reparameterize(Y_raw, p_lags, Y, Z, W)
    T = rows(Y)
    X = (Z, W)

    // --- OLS-VAR estimation of Eq. 3 ---
    _rbfm_ols_var(Y, X, Z, W, F_ols, E_hat)

    // --- Construct v_hat (Eq. 11) ---
    _rbfm_construct_vhat(Y_raw, p_lags, F_ols, X, V_hat)

    // --- Bandwidth selection ---
    if (bw_user < 0) {
        bw = _rbfm_andrews_bandwidth(V_hat, kernel_type)
    }
    else {
        bw = bw_user
    }

    // --- Kernel LRV estimation ---
    _rbfm_kernel_lrv(E_hat, V_hat, kernel_type, bw, Omega_ev, Omega_vv)

    // --- One-sided LRV ---
    real matrix dW
    dW = W[2::T, .] - W[1::(T-1), .]
    // Pad first row with zeros
    dW = (J(1, cols(W), 0) \ dW)
    _rbfm_kernel_olrv(V_hat, dW, kernel_type, bw, Delta_vdw)

    // --- RBFM-VAR correction (Eq. 12-13) ---
    _rbfm_correct(Y, Z, W, X, E_hat, V_hat, Omega_ev, Omega_vv, Delta_vdw, T, F_plus, Gamma_plus, A_plus)

    // --- Residual covariance ---
    Sigma_e = (E_hat' * E_hat) / T

    // --- Standard errors for F⁺ ---
    // Var(vec(F̂')) = Σ̂_ε ⊗ (X'X)⁻¹  (Eq. 20)
    // SE for F⁺[i,j] = sqrt( Sigma_e[i,i] * XpXinv[j,j] )
    real matrix XpXinv_post, SE_mat
    XpXinv_post = invsym(cross(X, X))
    SE_mat = J(n, cols(X), 0)
    real scalar ii, jj
    for (ii = 1; ii <= n; ii++) {
        for (jj = 1; jj <= cols(X); jj++) {
            SE_mat[ii, jj] = sqrt(Sigma_e[ii, ii] * XpXinv_post[jj, jj])
        }
    }

    // --- Post results to Stata ---
    st_matrix("r(F_ols)", F_ols)
    st_matrix("r(F_plus)", F_plus)
    st_matrix("r(Sigma_e)", Sigma_e)
    st_matrix("r(SE_mat)", SE_mat)
    st_matrix("r(Omega_ev)", Omega_ev)
    st_matrix("r(Omega_vv)", Omega_vv)
    st_matrix("r(Delta_vdw)", Delta_vdw)
    st_numscalar("r(nobs)", nobs)
    st_numscalar("r(T_eff)", T)
    st_numscalar("r(n_vars)", n)
    st_numscalar("r(p_lags)", p_lags)
    st_numscalar("r(bandwidth)", bw)

    // --- Extract Π₁ and Π₂ matrices for display ---
    // F_ols = (Γ, A) where A = (Π₁_hat, Π₂_hat)
    // W = (Δy_{t-1}', y_{t-1}')' so A has 2n columns
    real matrix Pi1_ols, Pi2_ols, Pi1_plus, Pi2_plus
    real scalar ncols_z
    ncols_z = cols(Z)

    if (cols(W) == 2*n) {
        Pi1_ols  = F_ols[., (ncols_z+1)::(ncols_z+n)]
        Pi2_ols  = F_ols[., (ncols_z+n+1)::cols(F_ols)]
        Pi1_plus = F_plus[., (ncols_z+1)::(ncols_z+n)]
        Pi2_plus = F_plus[., (ncols_z+n+1)::cols(F_plus)]
    }
    else {
        Pi1_ols  = F_ols[., (ncols_z+1)::cols(F_ols)]
        Pi2_ols  = J(n, 0, .)
        Pi1_plus = F_plus[., (ncols_z+1)::cols(F_plus)]
        Pi2_plus = J(n, 0, .)
    }

    st_matrix("r(Pi1_ols)", Pi1_ols)
    st_matrix("r(Pi2_ols)", Pi2_ols)
    st_matrix("r(Pi1_plus)", Pi1_plus)
    st_matrix("r(Pi2_plus)", Pi2_plus)

    if (ncols_z > 0) {
        st_matrix("r(Gamma_ols)", F_ols[., 1::ncols_z])
        st_matrix("r(Gamma_plus)", F_plus[., 1::ncols_z])
    }

    // --- Granger causality test ---
    if (granger_spec != "") {
        real scalar wald_stat, wald_pval, wald_df
        _rbfm_granger_test(granger_spec, vnames, F_plus, Sigma_e, X, T, n, p_lags, ncols_z, wald_stat, wald_pval, wald_df)
        st_numscalar("r(wald_stat)", wald_stat)
        st_numscalar("r(wald_pval)", wald_pval)
        st_numscalar("r(wald_df)", wald_df)
    }

    // --- IRF if requested ---
    if (irf_horizon > 0) {
        real matrix irf_mat
        _rbfm_irf_compute(F_plus, Sigma_e, n, p_lags, ncols_z, irf_horizon, irf_mat)
        st_matrix("r(irf)", irf_mat)
        st_numscalar("r(irf_horizon)", irf_horizon)

        // --- Bootstrap IRF CI ---
        if (irf_boot_reps > 0) {
            real matrix irf_lo, irf_hi
            _rbfm_irf_bootstrap(Y_raw, p_lags, kernel_type, bw, n, ncols_z, 
                irf_horizon, irf_boot_reps, irf_ci_level, irf_lo, irf_hi)
            st_matrix("r(irf_lo)", irf_lo)
            st_matrix("r(irf_hi)", irf_hi)
            st_numscalar("r(irf_ci_level)", irf_ci_level)
            st_numscalar("r(irf_boot_reps)", irf_boot_reps)
        }
    }

    // --- FEVD if requested ---
    if (do_fevd > 0 & irf_horizon > 0) {
        real matrix fevd_mat
        _rbfm_fevd_compute(F_plus, Sigma_e, n, p_lags, ncols_z, irf_horizon, fevd_mat)
        st_matrix("r(fevd)", fevd_mat)
    }

    // --- Forecast if requested ---
    if (forecast_steps > 0) {
        real matrix fcast_mat, fcast_se
        _rbfm_forecast_compute(Y_raw, F_plus, Sigma_e, n, p_lags, ncols_z, 
            forecast_steps, fcast_mat, fcast_se)
        st_matrix("r(forecast)", fcast_mat)
        st_matrix("r(forecast_se)", fcast_se)
        st_numscalar("r(forecast_steps)", forecast_steps)
    }

    // --- Store residuals for density plot ---
    st_matrix("r(residuals)", E_hat)
}


// ========================================================================
// FUNCTION: _rbfm_ecm_reparameterize()
// Reparameterize levels VAR into Eq. 3 form (Chang 2000, p.907)
//   y_t = Γ z_t + A w_t + ε_t
// Dependent: y_t (LEVELS, not differences)
// z_t = (Δ²y_{t-1},...,Δ²y_{t-p+2})' — stationary regressors
// w_t = (Δy_{t-1}', y_{t-1}')' — nonstationary regressors
// ========================================================================

void _rbfm_ecm_reparameterize(
    real matrix Y_raw,
    real scalar p,
    real matrix Y,
    real matrix Z,
    real matrix W)
{
    real scalar nobs, n, T, t, j, p_eff
    real matrix dY, d2Y

    nobs = rows(Y_raw)
    n    = cols(Y_raw)

    // First differences
    dY = Y_raw[2::nobs, .] - Y_raw[1::(nobs-1), .]

    // Second differences
    d2Y = dY[2::rows(dY), .] - dY[1::(rows(dY)-1), .]

    // Effective sample start: need Δy_{t-1} which requires dY row ≥ 1
    // dY row k = Δy at original time k+1
    // Δy_{t-1} at t_orig = p_eff + t → dY row p_eff + t - 2
    // For t=1: p_eff + 1 - 2 = p_eff - 1 ≥ 1 requires p_eff ≥ 2
    p_eff = max((p, 2))

    T = nobs - p_eff
    if (T < 10) {
        errprintf("Insufficient observations (T=%g, need at least 10)\n", T)
        exit(2001)
    }

    // Dependent variable: y_t (LEVELS per Eq. 3)
    Y = Y_raw[(p_eff+1)::nobs, .]

    // Build Z = (Δ²y_{t-1}, ..., Δ²y_{t-p+2})  [stationary regressors]
    if (p >= 3) {
        Z = J(T, n*(p-2), 0)
        for (t = 1; t <= T; t++) {
            // Original time for row t: t_orig = p_eff + t
            for (j = 1; j <= p-2; j++) {
                // Δ²y_{t-j}: d2Y row (t_orig - j - 2) = p_eff + t - j - 2
                real scalar d2_idx
                d2_idx = p_eff + t - j - 2
                if (d2_idx >= 1 & d2_idx <= rows(d2Y)) {
                    Z[t, (n*(j-1)+1)::(n*j)] = d2Y[d2_idx, .]
                }
            }
        }
    }
    else {
        Z = J(T, 0, .)
    }

    // Build W = (Δy_{t-1}', y_{t-1}')  [nonstationary regressors]
    real matrix dY_lag1, Y_lag1

    dY_lag1 = J(T, n, 0)
    Y_lag1  = J(T, n, 0)

    for (t = 1; t <= T; t++) {
        // Original time for row t of Y: t_orig = p_eff + t
        // dY row k corresponds to Δy at original time k+1
        // Δy_{t-1} at original time p_eff+t-1 → dY row p_eff+t-2
        dY_lag1[t, .] = dY[p_eff + t - 2, .]
        // y_{t-1}: Y_raw row t_orig - 1 = p_eff + t - 1
        Y_lag1[t, .] = Y_raw[p_eff + t - 1, .]
    }

    W = (dY_lag1, Y_lag1)
}


// ========================================================================
// FUNCTION: _rbfm_ols_var()
// OLS estimation of the reparameterized model (Eq. 3)
// ========================================================================

void _rbfm_ols_var(
    real matrix Y,
    real matrix X,
    real matrix Z,
    real matrix W,
    real matrix F_ols,
    real matrix E_hat)
{
    real matrix XpX, XpXinv

    XpX = cross(X, X)
    XpXinv = invsym(XpX)
    F_ols = (Y' * X) * XpXinv
    E_hat = Y - X * F_ols'
}


// ========================================================================
// FUNCTION: _rbfm_construct_vhat()
// Construct v_hat_t per Eq. 11
// v_hat_t = (Δ²y_{t-1}, Δy_{t-1} - N_hat * Δy_{t-2})'
// N_hat is OLS from: Δy_{t-1} = N * Δy_{t-2} + error
// ========================================================================

void _rbfm_construct_vhat(
    real matrix Y_raw,
    real scalar p,
    real matrix F_ols,
    real matrix X,
    real matrix V_hat)
{
    real scalar nobs, n, T, p_eff
    real matrix dY, d2Y, dY_lag1, dY_lag2, N_hat
    real matrix v1, v2
    real scalar t

    nobs = rows(Y_raw)
    n    = cols(Y_raw)
    p_eff = max((p, 2))

    // Differences
    dY  = Y_raw[2::nobs, .] - Y_raw[1::(nobs-1), .]
    d2Y = dY[2::rows(dY), .] - dY[1::(rows(dY)-1), .]

    T = rows(X)

    // Build Δy_{t-1} and Δy_{t-2} aligned to effective sample
    dY_lag1 = J(T, n, 0)
    dY_lag2 = J(T, n, 0)

    for (t = 1; t <= T; t++) {
        dY_lag1[t, .] = dY[p_eff + t - 2, .]
        if (p_eff + t - 3 >= 1) {
            dY_lag2[t, .] = dY[p_eff + t - 3, .]
        }
    }

    // OLS: Δy_{t-1} = N * Δy_{t-2} + error
    N_hat = (dY_lag1' * dY_lag2) * invsym(cross(dY_lag2, dY_lag2))

    // v1 = Δ²y_{t-1}
    v1 = J(T, n, 0)
    for (t = 1; t <= T; t++) {
        if (p_eff + t - 3 >= 1) {
            v1[t, .] = d2Y[p_eff + t - 3, .]
        }
    }

    // v2 = Δy_{t-1} - N_hat * Δy_{t-2}
    v2 = dY_lag1 - dY_lag2 * N_hat'

    // V_hat = (v1, v2) — each T x n, combined T x 2n
    V_hat = (v1, v2)
}


// ========================================================================
// FUNCTION: _rbfm_kernel_weight()
// Kernel weight function
// ========================================================================

real scalar _rbfm_kernel_weight(real scalar x, string scalar kernel)
{
    real scalar w, z

    if (kernel == "bartlett") {
        w = (abs(x) <= 1) ? (1 - abs(x)) : 0
    }
    else if (kernel == "parzen") {
        z = abs(x)
        if (z <= 0.5) {
            w = 1 - 6*z^2 + 6*z^3
        }
        else if (z <= 1) {
            w = 2*(1 - z)^3
        }
        else {
            w = 0
        }
    }
    else {
        // Quadratic Spectral (QS)
        if (x == 0) {
            w = 1
        }
        else {
            z = 6 * pi() * x / 5
            w = (25 / (12 * pi()^2 * x^2)) * (sin(z)/z - cos(z))
        }
    }

    return(w)
}


// ========================================================================
// FUNCTION: _rbfm_andrews_bandwidth()
// Andrews (1991) automatic bandwidth selection
// ========================================================================

real scalar _rbfm_andrews_bandwidth(
    real matrix V_hat,
    string scalar kernel)
{
    real scalar T, n_v, i, rho_hat, alpha_hat, bw
    real scalar num, den
    real matrix v_i
    real colvector rho_vec

    T   = rows(V_hat)
    n_v = cols(V_hat)

    // Estimate AR(1) coefficients for each component of v_hat
    num = 0
    den = 0

    for (i = 1; i <= n_v; i++) {
        v_i = V_hat[., i]
        rho_hat = (v_i[1::(T-1)]' * v_i[2::T]) / (v_i[1::(T-1)]' * v_i[1::(T-1)])
        if (abs(rho_hat) >= 1) rho_hat = 0.97 * sign(rho_hat)

        if (kernel == "bartlett") {
            num = num + 4 * rho_hat^2 / ((1 - rho_hat)^2 * (1 + rho_hat)^2)
            den = den + 1
        }
        else if (kernel == "parzen") {
            num = num + 4 * rho_hat^2 / ((1 - rho_hat)^4 * (1 + rho_hat)^4)
            den = den + 1
        }
        else {
            // QS kernel
            num = num + 4 * rho_hat^2 / ((1 - rho_hat)^4 * (1 + rho_hat)^4)
            den = den + 1
        }
    }

    alpha_hat = num / den

    if (kernel == "bartlett") {
        bw = 1.1447 * (alpha_hat * T)^(1/3)
    }
    else if (kernel == "parzen") {
        bw = 2.6614 * (alpha_hat * T)^(1/5)
    }
    else {
        // QS kernel
        bw = 1.3221 * (alpha_hat * T)^(1/5)
    }

    bw = max((bw, 1))
    bw = min((bw, T/3))

    return(bw)
}


// ========================================================================
// FUNCTION: _rbfm_kernel_lrv()
// Kernel estimates of long-run covariance matrices:
//   Omega_ev = lrvar(epsilon, v)
//   Omega_vv = lrvar(v, v)
// ========================================================================

void _rbfm_kernel_lrv(
    real matrix E_hat,
    real matrix V_hat,
    string scalar kernel,
    real scalar bw,
    real matrix Omega_ev,
    real matrix Omega_vv)
{
    real scalar T, j
    real scalar w_j
    real matrix Gamma_ev_j, Gamma_vv_j

    T = rows(E_hat)

    // Lag 0
    Omega_ev = (E_hat' * V_hat) / T
    Omega_vv = (V_hat' * V_hat) / T

    // Lags 1 to bw (or until kernel weight is negligible)
    real scalar max_lag
    if (kernel == "bartlett" | kernel == "parzen") {
        max_lag = floor(bw)
    }
    else {
        max_lag = min((floor(3 * bw), T - 1))
    }

    for (j = 1; j <= max_lag; j++) {
        w_j = _rbfm_kernel_weight(j / bw, kernel)
        if (abs(w_j) < 1e-12) continue

        // Γ_εv(j) = (1/T) Σ ε_{t+j} v_t'
        Gamma_ev_j = (E_hat[(j+1)::T, .]' * V_hat[1::(T-j), .]) / T
        // Γ_εv(-j) = (1/T) Σ ε_t v_{t+j}' = Γ_vε(j)'
        real matrix Gamma_ev_neg_j
        Gamma_ev_neg_j = (V_hat[(j+1)::T, .]' * E_hat[1::(T-j), .])' / T

        Gamma_vv_j = (V_hat[(j+1)::T, .]' * V_hat[1::(T-j), .]) / T

        // Ω_εv = Σ w(j/K) [Γ_εv(j) + Γ_εv(-j)] — correct two-sided sum
        Omega_ev = Omega_ev + w_j * (Gamma_ev_j + Gamma_ev_neg_j)
        // Ω_vv: Γ_vv(-j) = Γ_vv(j)' (same subscripts), so transpose is correct
        Omega_vv = Omega_vv + w_j * (Gamma_vv_j + Gamma_vv_j')
    }
}


// ========================================================================
// FUNCTION: _rbfm_kernel_olrv()
// One-sided long-run covariance: Delta_vdw = sum_{j=0}^{K} w(j/K) * Gamma_v,dw(j)
// This is the one-sided (forward) LRV
// ========================================================================

void _rbfm_kernel_olrv(
    real matrix V_hat,
    real matrix dW,
    string scalar kernel,
    real scalar bw,
    real matrix Delta_vdw)
{
    real scalar T, j, max_lag
    real scalar w_j
    real matrix Gamma_j

    T = rows(V_hat)

    // Lag 0
    Delta_vdw = (V_hat' * dW) / T

    if (kernel == "bartlett" | kernel == "parzen") {
        max_lag = floor(bw)
    }
    else {
        max_lag = min((floor(3 * bw), T - 1))
    }

    for (j = 1; j <= max_lag; j++) {
        w_j = _rbfm_kernel_weight(j / bw, kernel)
        if (abs(w_j) < 1e-12) continue

        // One-sided LRV: Δ̂_vΔw = Σ_{j≥0} w(j/K) · (1/T) Σ_t v̂_t · Δw_{t+j}'
        // v̂ at time t aligned with Δw at time t+j (forward)
        Gamma_j = (V_hat[1::(T-j), .]' * dW[(j+1)::T, .]) / T
        Delta_vdw = Delta_vdw + w_j * Gamma_j
    }
}


// ========================================================================
// FUNCTION: _rbfm_correct()
// Apply RBFM-VAR correction per Eq. 12-13
// F⁺ = (Y'Z, Y⁺'W - T·Â⁺)(X'X)⁻¹
//   Y⁺ = Y' - Ω̂_εv · Ω̂_vv⁻¹ · V'
//   Â⁺ = (0,I) · Δ̂_vΔw  (serial correlation)
// ========================================================================

void _rbfm_correct(
    real matrix Y,
    real matrix Z,
    real matrix W,
    real matrix X,
    real matrix E_hat,
    real matrix V_hat,
    real matrix Omega_ev,
    real matrix Omega_vv,
    real matrix Delta_vdw,
    real scalar T,
    real matrix F_plus,
    real matrix Gamma_plus,
    real matrix A_plus)
{
    real matrix Omega_vv_inv, Y_plus_t, XpXinv
    real matrix YpW_corrected
    real matrix A_correction
    real scalar n, ncols_w

    n = cols(Y)
    ncols_w = cols(W)

    // Ω̂_vv⁻¹ using Moore-Penrose (may be singular per paper)
    Omega_vv_inv = pinv(Omega_vv)

    // Y⁺' = Y' - Ω̂_εv · Ω̂_vv⁻¹ · V'
    // => Y⁺ = Y - V * (Ω̂_vv⁻¹' * Ω̂_εv')
    Y_plus_t = Y - V_hat * (Omega_vv_inv' * Omega_ev')

    // Â⁺ = (0_n x n*(p-2), I_n ⊗ [0,I]) · Δ̂_vΔw
    // Simpler: Â⁺ is n x 2n, constructed from one-sided LRV
    // The serial correction term relates v̂ to Δw
    // (0,I) selects the Δw part
    // Δ̂_vΔw is (2n x 2n) — picking n-to-2n rows gives the v₂ component
    if (ncols_w == 2*n) {
        A_correction = Delta_vdw[(n+1)::(2*n), .]
    }
    else {
        A_correction = Delta_vdw
    }

    // Y⁺'W corrected
    YpW_corrected = Y_plus_t' * W - T * A_correction

    // X'X inverse
    XpXinv = invsym(cross(X, X))

    // F⁺ = (Y'Z, Y⁺'W - T·Â⁺)(X'X)⁻¹
    // Note: Z part uses ORIGINAL Y (not corrected), per Eq. 12
    if (cols(Z) > 0) {
        F_plus = ((Y' * Z), YpW_corrected) * XpXinv
    }
    else {
        F_plus = YpW_corrected * XpXinv
    }

    // Extract components
    if (cols(Z) > 0) {
        Gamma_plus = F_plus[., 1::cols(Z)]
        A_plus     = F_plus[., (cols(Z)+1)::cols(F_plus)]
    }
    else {
        Gamma_plus = J(n, 0, .)
        A_plus     = F_plus
    }
}


// ========================================================================
// FUNCTION: _rbfm_wald_test()
// Modified Wald test W_F⁺ per Eq. 20 of Chang (2000)
// W_F⁺ = (R vec(F⁺) - r)' [R (Σ̂_ε ⊗ (X'X)⁻¹) R']⁻¹ (R vec(F⁺) - r)
// Σ̂_ε = E'E/T (usual estimator), so Var(vec(F̂)) = Σ̂_ε ⊗ (X'X)⁻¹
// Conservative p-value from χ²_q bound (Theorem 2)
// ========================================================================

void _rbfm_wald_test(
    real matrix R,
    real colvector r_vec,
    real matrix F_plus,
    real matrix Sigma_e,
    real matrix X,
    real scalar T,
    real scalar wald_stat,
    real scalar wald_pval,
    real scalar wald_df)
{
    real colvector vecF, Rf_r
    real matrix XpXinv, V_mat, V_inv

    vecF = vec(F_plus')
    Rf_r = R * vecF - r_vec

    XpXinv = invsym(cross(X, X))

    // Variance: Var(vec(F̂)) = Σ̂_ε ⊗ (X'X)⁻¹
    // where Σ̂_ε = E'E/T is the usual covariance estimator
    V_mat = R * (Sigma_e # XpXinv) * R'
    V_inv = invsym(V_mat)

    // Standard Wald: no extra T since Σ̂_ε already normalized by T
    wald_stat = Rf_r' * V_inv * Rf_r
    wald_df   = rows(R)
    wald_pval = 1 - chi2(wald_df, wald_stat)
    if (wald_pval < 0) wald_pval = 0
}


// ========================================================================
// FUNCTION: _rbfm_granger_test()
// Build R matrix for Granger non-causality and call Wald test
// Spec format: "y1 -> y2" meaning "test H₀: y1 does NOT Granger-cause y2"
// ========================================================================

void _rbfm_granger_test(
    string scalar granger_spec,
    string rowvector vnames,
    real matrix F_plus,
    real matrix Sigma_e,
    real matrix X,
    real scalar T,
    real scalar n,
    real scalar p,
    real scalar ncols_z,
    real scalar wald_stat,
    real scalar wald_pval,
    real scalar wald_df)
{
    string rowvector parts
    string scalar cause_var, effect_var
    real scalar i_cause, i_effect, i, found
    real matrix R
    real colvector r_vec
    real scalar np_total, j, row_idx

    // Parse "cause -> effect"
    parts = tokens(granger_spec)
    if (length(parts) < 3) {
        errprintf("granger() format: 'varname -> varname'\n")
        exit(198)
    }
    cause_var  = parts[1]
    effect_var = parts[3]

    // Find variable indices
    i_cause = 0
    i_effect = 0
    for (i = 1; i <= n; i++) {
        if (vnames[i] == cause_var)  i_cause  = i
        if (vnames[i] == effect_var) i_effect = i
    }

    if (i_cause == 0) {
        errprintf("Variable '%s' not found in model\n", cause_var)
        exit(198)
    }
    if (i_effect == 0) {
        errprintf("Variable '%s' not found in model\n", effect_var)
        exit(198)
    }

    // Build restriction matrix R for H₀: coefficients of cause_var in effect_var equation = 0
    // F⁺ is n x np_total (n equations × np_total regressors)
    // vecF = vec(F⁺') stacks COLUMNS of F⁺' = stacks ROWS of F⁺
    //   F⁺' is (np_total × n), so vec(F⁺') has n*np_total elements
    //   Position of F⁺[eq, reg] in vec(F⁺') = (eq-1)*np_total + reg
    //
    // We need to set to zero: for the effect equation (row i_effect of F⁺),
    // ALL columns corresponding to cause_var in Γ (Δ²y lags), Π₁, and Π₂

    np_total = cols(X)  // = ncols_z + 2*n = n*(p-2) + 2*n = n*p

    // Count restrictions:
    // - In Γ: (p-2) lags of Δ²y, each has cause_var column → (p-2) restrictions
    // - In Π₁ (Δy_{t-1}): cause_var column → 1 restriction
    // - In Π₂ (y_{t-1}): cause_var column → 1 restriction
    // Total = p restrictions

    real scalar n_restrictions, k
    n_restrictions = max((p - 2, 0)) + 2  // (p-2) from Γ (if p≥3) + 1 from Π₁ + 1 from Π₂

    R = J(n_restrictions, n * np_total, 0)
    r_vec = J(n_restrictions, 1, 0)

    // vec(F⁺') maps: element F⁺[eq, reg] -> position (eq-1)*np_total + reg
    row_idx = 0

    // Restrictions on Γ: Δ²y_{t-j} blocks for j = 1,...,p-2
    // In F⁺, Γ occupies columns 1 to ncols_z = n*(p-2)
    // Block j of Γ: columns n*(j-1)+1 to n*j contain Δ²y_{t-j}
    // Within each block, cause_var is at column n*(j-1) + i_cause
    for (k = 1; k <= p - 2; k++) {
        row_idx = row_idx + 1
        j = n * (k - 1) + i_cause
        R[row_idx, (i_effect-1)*np_total + j] = 1
    }

    // Restriction on Π₁: F⁺[i_effect, ncols_z + i_cause] = 0
    row_idx = row_idx + 1
    j = ncols_z + i_cause
    R[row_idx, (i_effect-1)*np_total + j] = 1

    // Restriction on Π₂: F⁺[i_effect, ncols_z + n + i_cause] = 0
    row_idx = row_idx + 1
    j = ncols_z + n + i_cause
    R[row_idx, (i_effect-1)*np_total + j] = 1

    // Call Wald test
    _rbfm_wald_test(R, r_vec, F_plus, Sigma_e, X, T, wald_stat, wald_pval, wald_df)
}


// ========================================================================
// FUNCTION: _rbfm_select_lags()
// Information criterion lag selection for VAR order
// ========================================================================

real scalar _rbfm_select_lags(
    real matrix Y_raw,
    real scalar max_p,
    string scalar ic_type)
{
    real scalar n, nobs, p, T, best_p
    real scalar ic_val, best_ic, logdet_sigma
    real matrix Y, Z, W, X, F_tmp, E_tmp, Sigma

    n    = cols(Y_raw)
    nobs = rows(Y_raw)
    best_ic = .
    best_p  = 1

    for (p = 1; p <= max_p; p++) {
        T = nobs - 2 - p + 2
        if (T < 3*n) continue

        _rbfm_ecm_reparameterize(Y_raw, p, Y, Z, W)
        T = rows(Y)
        X = (Z, W)

        _rbfm_ols_var(Y, X, Z, W, F_tmp, E_tmp)

        Sigma = (E_tmp' * E_tmp) / T
        logdet_sigma = ln(det(Sigma))

        if (ic_type == "aic") {
            ic_val = logdet_sigma + 2 * n^2 * p / T
        }
        else if (ic_type == "bic") {
            ic_val = logdet_sigma + n^2 * p * ln(T) / T
        }
        else {
            // HQ
            ic_val = logdet_sigma + 2 * n^2 * p * ln(ln(T)) / T
        }

        if (ic_val < best_ic | best_ic == .) {
            best_ic = ic_val
            best_p  = p
        }
    }

    return(best_p)
}


// ========================================================================
// FUNCTION: _rbfm_irf_compute()
// Impulse Response Functions from the RBFM-VAR estimates
// Uses Cholesky decomposition of Sigma for orthogonalized IRFs
// ========================================================================

void _rbfm_irf_compute(
    real matrix F_plus,
    real matrix Sigma_e,
    real scalar n,
    real scalar p,
    real scalar ncols_z,
    real scalar horizon,
    real matrix irf_mat)
{
    real matrix A_companion, J_mat, Phi_h, P_chol
    real matrix irf_h
    real scalar h, total_np, comp_dim
    real matrix Pi1, Pi2, Gamma_est
    real scalar i, k

    // Extract Γ (stationary), Π₁, and Π₂
    if (ncols_z > 0) {
        Gamma_est = F_plus[., 1::ncols_z]
    }
    Pi1 = F_plus[., (ncols_z+1)::(ncols_z+n)]
    Pi2 = F_plus[., (ncols_z+n+1)::cols(F_plus)]

    // Reconstruct levels VAR A₁,...,A_p from ECM parameters
    // The ECM form (Eq. 3) relates to the levels VAR (Eq. 1) via:
    //   Φ = Γ with Φ_k = Σ_{j=k}^{p} (j-k+1)·A_j  for k = 1,...,p-2
    //   Π₁ = -Σ_{k=2}^{p} (k-1)·A_k,  Π₂ = Σ_{k=1}^{p} A_k
    //
    // For p=1: y_t = Π₁·Δy_{t-1} + Π₂·y_{t-1} + ε_t
    //        = (Π₁+Π₂)·y_{t-1} - Π₁·y_{t-2} + ε_t  → VAR(2) in levels
    //   A₁ = Π₁ + Π₂,  A₂ = -Π₁
    //
    // For p=2: y_t = Γ₁·Δ²y_{t-1} + Π₁·Δy_{t-1} + Π₂·y_{t-1} (no Γ, ncols_z=0 here)
    //   Actually p=2 ⟹ ncols_z = 0, same as p=1 reconstruction
    //   Still: A₁ = Π₁ + Π₂,  A₂ = -Π₁
    //
    // General: recover A matrices from the definitions on p.908.
    //   A_p = (-1)^(p-1) component from the recursion.
    //   We use the cleaner approach: build the p-th order levels VAR companion.

    // For any p, the ECM Eq.3 can be inverted:
    //   y_t = (Π₁+Π₂+I)·y_{t-1} + Σ Γ contributions from higher lags
    // The most reliable approach:
    //   From Eq. 3: y_t = Γ·z_t + Π₁·Δy_{t-1} + Π₂·y_{t-1} + ε_t
    //   Substituting z_t and Δy back into levels form:
    //   The companion form always ends up being p+1 dimensional in levels.

    // Direct reconstruction for the general case:
    // The ECM form with Δ²y lags gives a levels VAR of order p+1:
    //   y_t = A₁·y_{t-1} + A₂·y_{t-2} + ... + A_{p+1}·y_{t-(p+1)} + ε_t
    // We can derive A matrices by collecting terms.
    //
    // Simpler: note that ECM Eq. 3 is:
    //   y_t = [Γ₁,...,Γ_{p-2}] · [Δ²y_{t-1},...,Δ²y_{t-p+2}]' + Π₁·Δy_{t-1} + Π₂·y_{t-1} + ε_t
    //
    // Expanding Δ²y_{t-j} = y_{t-j} - 2·y_{t-j-1} + y_{t-j-2} and Δy_{t-1} = y_{t-1} - y_{t-2}:
    //   y_t = Σ_{j=1}^{p-2} Γ_j·(y_{t-j} - 2·y_{t-j-1} + y_{t-j-2}) + Π₁·(y_{t-1} - y_{t-2}) + Π₂·y_{t-1} + ε_t
    //
    // Collecting coefficients on y_{t-1},...,y_{t-p}:

    real scalar p_levels
    p_levels = p  // The levels VAR order: ECM with p lags → levels VAR(p) in the Chang setup
                  // Actually: the highest lag in y is max(1 from Δy, p-2+2 from Δ²y lags) = p
                  // But Δ²y_{t-j} = y_{t-j} - 2y_{t-j-1} + y_{t-j-2} reaches y_{t-p+2-2} = y_{t-p}
                  // And Δy_{t-1} = y_{t-1} - y_{t-2}
                  // So the highest lag of y is max(p, 2) = p (since p ≥ 1)

    // Build A matrices: A_k is the coefficient on y_{t-k} in the levels VAR
    real matrix A_levs
    A_levs = J(n, n * p_levels, 0)  // A_levs = (A₁, A₂, ..., A_p)

    // Contributions from Π₂·y_{t-1}
    A_levs[., 1::n] = A_levs[., 1::n] + Pi2

    // Contributions from Π₁·Δy_{t-1} = Π₁·y_{t-1} - Π₁·y_{t-2}
    A_levs[., 1::n] = A_levs[., 1::n] + Pi1
    if (p_levels >= 2) {
        A_levs[., (n+1)::(2*n)] = A_levs[., (n+1)::(2*n)] - Pi1
    }

    // Contributions from Γ_j·Δ²y_{t-j} for j = 1,...,p-2
    // Δ²y_{t-j} = y_{t-j} - 2·y_{t-j-1} + y_{t-j-2}
    // → coefficient on y_{t-j}: +Γ_j
    // → coefficient on y_{t-j-1}: -2·Γ_j
    // → coefficient on y_{t-j-2}: +Γ_j
    for (k = 1; k <= p - 2; k++) {
        real matrix Gamma_k
        Gamma_k = Gamma_est[., (n*(k-1)+1)::(n*k)]

        // y_{t-k}: +Γ_k
        A_levs[., (n*(k-1)+1)::(n*k)] = A_levs[., (n*(k-1)+1)::(n*k)] + Gamma_k

        // y_{t-k-1}: -2·Γ_k
        if (k + 1 <= p_levels) {
            A_levs[., (n*k+1)::(n*(k+1))] = A_levs[., (n*k+1)::(n*(k+1))] - 2 * Gamma_k
        }

        // y_{t-k-2}: +Γ_k
        if (k + 2 <= p_levels) {
            A_levs[., (n*(k+1)+1)::(n*(k+2))] = A_levs[., (n*(k+1)+1)::(n*(k+2))] + Gamma_k
        }
    }

    // Build companion matrix of dimension p_levels * n
    comp_dim = p_levels * n
    A_companion = J(comp_dim, comp_dim, 0)

    // First n rows: (A₁, A₂, ..., A_p)
    A_companion[1::n, .] = A_levs

    // Identity blocks below
    if (p_levels >= 2) {
        A_companion[(n+1)::comp_dim, 1::((p_levels-1)*n)] = I((p_levels-1)*n)
    }

    // Cholesky of Sigma
    P_chol = cholesky(Sigma_e)

    // Selection matrix: picks first n rows
    J_mat = (I(n), J(n, comp_dim - n, 0))

    // IRF: Φ_h = J * A^h * J' * P
    irf_mat = J(n*n, horizon+1, 0)

    for (h = 0; h <= horizon; h++) {
        if (h == 0) {
            Phi_h = I(comp_dim)
        }
        else {
            Phi_h = A_companion
            for (i = 2; i <= h; i++) {
                Phi_h = Phi_h * A_companion
            }
        }
        irf_h = J_mat * Phi_h * J_mat' * P_chol
        irf_mat[., h+1] = vec(irf_h)
    }
}


// ========================================================================
// FUNCTION: _rbfm_irf_bootstrap()
// Residual-based bootstrap for IRF confidence intervals
// Uses centered residuals, resamples, reconstructs data, re-estimates
// ========================================================================

void _rbfm_irf_bootstrap(
    real matrix Y_raw,
    real scalar p_lags,
    string scalar kernel_type,
    real scalar bw,
    real scalar n,
    real scalar ncols_z,
    real scalar horizon,
    real scalar B,
    real scalar ci_level,
    real matrix irf_lo,
    real matrix irf_hi)
{
    real scalar b, T, nobs, h, idx
    real matrix Y, Z, W, X, F_ols, E_hat, V_hat
    real matrix Omega_ev, Omega_vv, Delta_vdw, dW
    real matrix F_plus_b, Gamma_plus_b, A_plus_b, Sigma_e_b
    real matrix irf_mat_b, irf_store
    real matrix E_centered, Y_boot
    real scalar p_eff, alpha_lo, alpha_hi
    real colvector boot_idx
    real scalar n_irf_elem, b_ok

    // ECM reparameterization of original data
    _rbfm_ecm_reparameterize(Y_raw, p_lags, Y, Z, W)
    T = rows(Y)
    X = (Z, W)
    _rbfm_ols_var(Y, X, Z, W, F_ols, E_hat)

    // Center residuals
    E_centered = E_hat :- mean(E_hat)

    nobs  = rows(Y_raw)
    p_eff = max((p_lags, 2))

    // Storage: B rows x n_irf_elem cols (one bootstrap draw per row)
    n_irf_elem = n * n * (horizon + 1)
    irf_store  = J(B, n_irf_elem, .)

    alpha_lo = (100 - ci_level) / 2 / 100
    alpha_hi = 1 - alpha_lo

    b_ok = 0

    for (b = 1; b <= B; b++) {
        // Resample residuals with replacement
        boot_idx = ceil(T :* runiform(T, 1))
        boot_idx = rowmax((boot_idx, J(T, 1, 1)))
        boot_idx = rowmin((boot_idx, J(T, 1, T)))

        real matrix E_boot
        E_boot = E_centered[boot_idx, .]

        // Reconstruct Y_boot from fitted values + bootstrap residuals
        // Y_boot = X * F_ols' + E_boot (in the ECM representation)
        real matrix Y_boot_ecm
        Y_boot_ecm = X * F_ols' + E_boot

        // Rebuild full Y_raw_boot from Y_boot_ecm
        // Use original initial values + reconstructed levels
        Y_boot = Y_raw[1::p_eff, .]
        for (h = 1; h <= T; h++) {
            Y_boot = Y_boot \ Y_boot_ecm[h, .]
        }

        // Re-estimate RBFM-VAR on bootstrap data
        real matrix Yb, Zb, Wb, Xb, F_ols_b, E_hat_b, V_hat_b
        real matrix Omega_ev_b, Omega_vv_b, Delta_vdw_b, dWb
        real scalar Tb, ncols_z_b

        _rbfm_ecm_reparameterize(Y_boot, p_lags, Yb, Zb, Wb)
        Tb = rows(Yb)
        if (Tb < 5) continue

        Xb = (Zb, Wb)
        _rbfm_ols_var(Yb, Xb, Zb, Wb, F_ols_b, E_hat_b)
        _rbfm_construct_vhat(Y_boot, p_lags, F_ols_b, Xb, V_hat_b)

        // Guard: row counts must match before kernel LRV
        if (rows(E_hat_b) != Tb | rows(V_hat_b) != Tb) continue

        _rbfm_kernel_lrv(E_hat_b, V_hat_b, kernel_type, bw, Omega_ev_b, Omega_vv_b)

        dWb = Wb[2::Tb, .] - Wb[1::(Tb-1), .]
        dWb = (J(1, cols(Wb), 0) \ dWb)
        _rbfm_kernel_olrv(V_hat_b, dWb, kernel_type, bw, Delta_vdw_b)

        _rbfm_correct(Yb, Zb, Wb, Xb, E_hat_b, V_hat_b,
            Omega_ev_b, Omega_vv_b, Delta_vdw_b, Tb,
            F_plus_b, Gamma_plus_b, A_plus_b)

        Sigma_e_b = (E_hat_b' * E_hat_b) / Tb

        // Guard: Sigma must be positive definite for Cholesky in irf_compute
        if (det(Sigma_e_b) <= 0) continue

        // Compute IRFs from bootstrap estimates
        ncols_z_b = cols(Zb)
        _rbfm_irf_compute(F_plus_b, Sigma_e_b, n, p_lags, ncols_z_b, horizon, irf_mat_b)

        // Guard: irf_mat_b must have correct dimensions
        if (rows(irf_mat_b) != n*n | cols(irf_mat_b) != horizon+1) continue

        // Store as a row: vec(irf_mat_b)' is 1 x n_irf_elem
        b_ok = b_ok + 1
        irf_store[b_ok, .] = vec(irf_mat_b)'
    }

    // Trim to successful draws
    if (b_ok < 2) {
        irf_lo = J(n*n, horizon+1, 0)
        irf_hi = J(n*n, horizon+1, 0)
        return
    }
    irf_store = irf_store[1::b_ok, .]

    // Compute percentile-based CI
    irf_lo = J(n*n, horizon+1, 0)
    irf_hi = J(n*n, horizon+1, 0)

    real scalar lo_idx, hi_idx, irf_row, irf_col
    real colvector sorted_col

    lo_idx = max((1, ceil(b_ok * alpha_lo)))
    hi_idx = min((b_ok, floor(b_ok * alpha_hi)))
    if (hi_idx < lo_idx) hi_idx = b_ok

    for (idx = 1; idx <= n_irf_elem; idx++) {
        // Sort this element across bootstrap draws (column of irf_store)
        sorted_col = sort(irf_store[., idx], 1)

        // Map idx to (row, col) in irf_mat
        irf_col = ceil(idx / (n*n))
        irf_row = idx - (irf_col - 1) * (n*n)

        irf_lo[irf_row, irf_col] = sorted_col[lo_idx]
        irf_hi[irf_row, irf_col] = sorted_col[hi_idx]
    }
}


// ========================================================================
// FUNCTION: _rbfm_fevd_compute()
// Forecast Error Variance Decomposition
// FEVD_h(i,j) = Σ_{s=0}^{h} [Φ_s·P]²_{ij} / Σ_{s=0}^{h} Σ_k [Φ_s·P]²_{ik}
// ========================================================================

void _rbfm_fevd_compute(
    real matrix F_plus,
    real matrix Sigma_e,
    real scalar n,
    real scalar p,
    real scalar ncols_z,
    real scalar horizon,
    real matrix fevd_mat)
{
    // First compute all IRF matrices (Cholesky-orthogonalized)
    real matrix irf_mat
    _rbfm_irf_compute(F_plus, Sigma_e, n, p, ncols_z, horizon, irf_mat)

    // fevd_mat: n*n rows x (horizon+1) cols
    // fevd_mat[(i-1)*n + j, h+1] = proportion of var i's FEV due to shock j at horizon h
    fevd_mat = J(n*n, horizon + 1, 0)

    real scalar h, s, i, j, k
    real matrix Phi_s_P, cum_num, cum_denom

    // Cumulative numerator and denominator
    cum_num   = J(n, n, 0)   // cum_num[i,j] = Σ [Φ_s·P]²_{ij}
    cum_denom = J(n, 1, 0)   // cum_denom[i] = Σ_j cum_num[i,j]

    for (h = 0; h <= horizon; h++) {
        // Extract Φ_h · P from irf_mat column h+1
        // irf_mat[., h+1] = vec(Φ_h · P) where Φ_h·P is n x n
        Phi_s_P = rowshape(irf_mat[., h+1], n)

        // Accumulate squared responses
        cum_num = cum_num + Phi_s_P :^ 2

        // Compute denominator for each variable
        cum_denom = J(n, 1, 0)
        for (i = 1; i <= n; i++) {
            for (k = 1; k <= n; k++) {
                cum_denom[i] = cum_denom[i] + cum_num[i, k]
            }
        }

        // FEVD proportions
        for (i = 1; i <= n; i++) {
            for (j = 1; j <= n; j++) {
                real scalar row_idx
                row_idx = (i - 1) * n + j
                if (cum_denom[i] > 0) {
                    fevd_mat[row_idx, h+1] = cum_num[i, j] / cum_denom[i]
                }
            }
        }
    }
}


// ========================================================================
// FUNCTION: _rbfm_forecast_compute()
// Multi-step ahead forecast from the levels VAR companion form
// Includes forecast error standard errors from accumulated Σ_ε
// ========================================================================

void _rbfm_forecast_compute(
    real matrix Y_raw,
    real matrix F_plus,
    real matrix Sigma_e,
    real scalar n,
    real scalar p,
    real scalar ncols_z,
    real scalar steps,
    real matrix fcast_mat,
    real matrix fcast_se)
{
    real scalar nobs, p_levels, comp_dim, h, k, i
    real matrix Pi1, Pi2, Gamma_est, A_levs, A_companion, J_mat
    real matrix state, Phi_h, Sigma_accum, P_chol

    nobs = rows(Y_raw)

    // Extract Pi1, Pi2 from F_plus
    Pi1 = F_plus[., (ncols_z+1)::(ncols_z+n)]
    Pi2 = F_plus[., (ncols_z+n+1)::cols(F_plus)]

    if (ncols_z > 0) {
        Gamma_est = F_plus[., 1::ncols_z]
    }

    // Reconstruct levels VAR A matrices (same as in IRF)
    p_levels = p
    A_levs = J(n, n * p_levels, 0)

    A_levs[., 1::n] = A_levs[., 1::n] + Pi2
    A_levs[., 1::n] = A_levs[., 1::n] + Pi1
    if (p_levels >= 2) {
        A_levs[., (n+1)::(2*n)] = A_levs[., (n+1)::(2*n)] - Pi1
    }

    for (k = 1; k <= p - 2; k++) {
        real matrix Gamma_k
        Gamma_k = Gamma_est[., (n*(k-1)+1)::(n*k)]
        A_levs[., (n*(k-1)+1)::(n*k)] = A_levs[., (n*(k-1)+1)::(n*k)] + Gamma_k
        if (k + 1 <= p_levels) {
            A_levs[., (n*k+1)::(n*(k+1))] = A_levs[., (n*k+1)::(n*(k+1))] - 2 * Gamma_k
        }
        if (k + 2 <= p_levels) {
            A_levs[., (n*(k+1)+1)::(n*(k+2))] = A_levs[., (n*(k+1)+1)::(n*(k+2))] + Gamma_k
        }
    }

    // Build companion matrix
    comp_dim = p_levels * n
    A_companion = J(comp_dim, comp_dim, 0)
    A_companion[1::n, .] = A_levs
    if (p_levels >= 2) {
        A_companion[(n+1)::comp_dim, 1::((p_levels-1)*n)] = I((p_levels-1)*n)
    }

    // Selection matrix
    J_mat = (I(n), J(n, comp_dim - n, 0))

    // Build initial state from last p observations of Y_raw
    state = J(comp_dim, 1, 0)
    for (k = 1; k <= p_levels; k++) {
        if (nobs - k + 1 >= 1) {
            state[((k-1)*n+1)::(k*n)] = Y_raw[nobs - k + 1, .]'
        }
    }

    // Forecast and accumulate error variance
    fcast_mat = J(n, steps, 0)
    fcast_se  = J(n, steps, 0)
    Sigma_accum = J(n, n, 0)

    for (h = 1; h <= steps; h++) {
        state = A_companion * state
        fcast_mat[., h] = J_mat * state

        // Accumulate forecast error variance
        if (h == 1) {
            Phi_h = I(comp_dim)
        }
        else {
            Phi_h = I(comp_dim)
            for (i = 1; i <= h-1; i++) {
                Phi_h = Phi_h * A_companion
            }
        }
        Sigma_accum = Sigma_accum + J_mat * Phi_h * J_mat' * Sigma_e * (J_mat * Phi_h * J_mat')'

        for (i = 1; i <= n; i++) {
            fcast_se[i, h] = sqrt(Sigma_accum[i, i])
        }
    }
}


// ========================================================================
// FUNCTION: _rbfm_ic_table()
// Compute information criteria for all lag orders (for display)
// Returns matrix: rows = lag orders, cols = (p, AIC, BIC, HQ, T)
// ========================================================================

void _rbfm_ic_table(
    real matrix Y_raw,
    real scalar max_p)
{
    real scalar n, nobs, p, T, ic_aic, ic_bic, ic_hq, logdet_sigma
    real matrix Y, Z, W, X, F_tmp, E_tmp, Sigma
    real matrix ic_results
    real scalar row_idx

    n    = cols(Y_raw)
    nobs = rows(Y_raw)
    ic_results = J(max_p, 5, .)
    row_idx = 0

    for (p = 1; p <= max_p; p++) {
        T = nobs - 2 - p + 2
        if (T < 3*n) continue

        _rbfm_ecm_reparameterize(Y_raw, p, Y, Z, W)
        T = rows(Y)
        X = (Z, W)
        _rbfm_ols_var(Y, X, Z, W, F_tmp, E_tmp)

        Sigma = (E_tmp' * E_tmp) / T
        logdet_sigma = ln(det(Sigma))

        if (logdet_sigma == . | logdet_sigma == .) continue

        row_idx = row_idx + 1
        ic_results[row_idx, 1] = p
        ic_results[row_idx, 2] = logdet_sigma + 2 * n^2 * p / T
        ic_results[row_idx, 3] = logdet_sigma + n^2 * p * ln(T) / T
        ic_results[row_idx, 4] = logdet_sigma + 2 * n^2 * p * ln(ln(T)) / T
        ic_results[row_idx, 5] = T
    }

    if (row_idx > 0) {
        st_matrix("r(ic_table)", ic_results[1::row_idx, .])
    }
}


end