program define harwald, rclass
    version 15.0
    if "`e(cmd)'" != "harreg" {
        di as err "harwald may only be used after harreg"
        exit 301
    }

    /* `equalok' allows `=' inside the `anything' token so input like
       `harwald x1=0' reaches our parsing code instead of being
       rejected at the syntax line with the unhelpful Stata error
       "=exp not allowed". The bare-mode parser below detects `='
       and redirects users to the supported parens-mode form. */
    syntax [anything(name=coeflist equalok)] [, Level(cilevel) CRITDRAWS(integer 5000) SEED(integer 88741997)]

    if `critdraws' < 1000 local critdraws = 1000
    local lev = `level'/100

    * All internal scalars/matrices live in tempnames so they cannot
    * collide with the user's namespace.
    tempname b V Rmat cvec bR_t VR_t factor_t Fstat_t df2_t pval_t cv_t
    matrix `b' = e(b)
    matrix `V' = e(V)
    local est = lower(e(estimator))

    /* The seed is consumed only by harwald_sim (NW/QS Monte-Carlo
       p/cv path); EWC/EWP use analytic Ftail/invFtail. Reseed only
       when the simulator will run, so EWC/EWP `harwald' leaves the
       user's RNG state untouched. */
    if (`seed' != 0 & inlist("`est'","nw","qs")) {
        quietly set seed `seed'
    }
    local cn : colfullnames `b'
    local k = colsof(`b')

    * Build a normalized parallel list of coefficient names so user
    * input like `1.rep78` resolves to regress's base column
    * `1b.rep78` (which carries a structural zero, matching the
    * regress/test convention). The normalization strips a single
    * `b`, `bn`, or `bo` letter group appearing immediately before
    * a `.` that is preceded by a digit — i.e., factor-base markers
    * `1b.rep78`, `1bn.rep78`, `1bo.rep78` all map to `1.rep78`.
    * Identity on non-factor tokens.
    local cn_norm ""
    foreach name of local cn {
        local nname = ustrregexra("`name'", "([0-9]+)b[no]?\.", "$1.")
        local cn_norm "`cn_norm' `nname'"
    }

    /* Check if input contains parentheses (linear hypothesis syntax) */
    local has_parens = strpos("`coeflist'", "(")

    if `has_parens' > 0 {
        /* Refuse mixed list+parens. Any non-whitespace content before
           the first `(` is a bare-variable list mixed with
           parenthesized expressions; the parens-mode parser below
           only extracts content inside `(...)` and would silently
           drop the leading list portion. */
        local prefix = strtrim(substr("`coeflist'", 1, `has_parens'-1))
        if "`prefix'" != "" {
            di as err "cannot mix bare variable list and parenthesized expressions: `coeflist'"
            di as err `"use either "harwald x1 x2 ..." or "harwald (expr1) (expr2) ..."'"'
            exit 198
        }

        /* Linear hypothesis mode: parse expressions in parentheses */
        local exprs ""
        local rest "`coeflist'"
        local q = 0

        /* Extract all parenthesized expressions */
        while strpos("`rest'", "(") > 0 {
            local start = strpos("`rest'", "(")
            local end = strpos("`rest'", ")")
            if `end' == 0 {
                di as err "unmatched parenthesis in expression"
                exit 198
            }
            /* Validate any text between the previous `)' and this `('
               is whitespace only. Without this check the parser
               silently extracts the next `(...)' group and drops the
               intervening tokens (so `(x1) garbage (x2)' would test
               x1 and x2 and silently ignore `garbage'). */
            local gap = strtrim(substr("`rest'", 1, `start'-1))
            if "`gap'" != "" {
                di as err "stray tokens between parenthesized expressions: `gap'"
                di as err `"use "harwald (expr1) (expr2) ..." without text between groups"'
                exit 198
            }
            local expr = substr("`rest'", `start'+1, `end'-`start'-1)
            if strtrim("`expr'") == "" {
                di as err "empty expression in parentheses"
                exit 198
            }
            local rest = substr("`rest'", `end'+1, .)
            local q = `q' + 1
            local expr`q' "`expr'"
        }
        /* Validate trailing text after the final `)' is whitespace
           only. Same silent-drop pathology as the inter-group gap. */
        local tail = strtrim("`rest'")
        if "`tail'" != "" {
            di as err "stray tokens after final parenthesized expression: `tail'"
            exit 198
        }

        /* Build R matrix and c vector from expressions */
        matrix `Rmat' = J(`q', `k', 0)
        matrix `cvec' = J(`q', 1, 0)
        local testnames ""

        forvalues i = 1/`q' {
            local expr = strtrim("`expr`i''")
            local testnames "`testnames' (`expr')"

            /* The `lhs = rhs' form (e.g., `2*x1 - x2 = 1') is accepted.
               Split on the first `=' if present; process the LHS with
               sign +1 and the RHS with sign -1 (so the test is
               `lhs - rhs = 0'). Reject more than one `=' */
            local eqcount = length("`expr'") - length(subinstr("`expr'", "=", "", .))
            if `eqcount' > 1 {
                di as err "expression contains more than one `=': `expr'"
                exit 198
            }
            local eqpos = strpos("`expr'", "=")
            if `eqpos' > 0 {
                local lhs_str = substr("`expr'", 1, `eqpos'-1)
                local rhs_str = substr("`expr'", `eqpos'+1, .)
                if strtrim("`lhs_str'") == "" | strtrim("`rhs_str'") == "" {
                    di as err "empty side of `=' in expression: `expr'"
                    exit 198
                }
            }
            else {
                local lhs_str "`expr'"
                local rhs_str ""
            }

            /* Process LHS (sign=+1) then RHS (sign=-1). */
            forvalues side = 1/2 {
                if `side' == 1 {
                    local side_expr "`lhs_str'"
                    local side_sign = 1
                }
                else {
                    local side_expr "`rhs_str'"
                    local side_sign = -1
                    if "`side_expr'" == "" continue
                }

                /* Tokenize the expression into +/- separated terms.
                   First protect scientific-notation exponents (1e-3,
                   2.5E+4, ...) with sentinels so the next `-` -> `+-`
                   substitution does not split inside an exponent.
                   The `~M~`/`~P~` strings cannot appear in valid Stata
                   expressions; they are restored after the split. */
                local side_expr = subinstr("`side_expr'", " ", "", .)
                local side_expr = subinstr("`side_expr'", "e-", "e~M~", .)
                local side_expr = subinstr("`side_expr'", "e+", "e~P~", .)
                local side_expr = subinstr("`side_expr'", "E-", "E~M~", .)
                local side_expr = subinstr("`side_expr'", "E+", "E~P~", .)
                local side_expr = subinstr("`side_expr'", "-", "+-", .)

                /* Split by + */
                local terms ""
                local remaining "`side_expr'"
                while "`remaining'" != "" {
                    local pluspos = strpos("`remaining'", "+")
                    if `pluspos' == 0 {
                        if "`remaining'" != "" {
                            local terms "`terms' `remaining'"
                        }
                        local remaining ""
                    }
                    else if `pluspos' == 1 {
                        local remaining = substr("`remaining'", 2, .)
                    }
                    else {
                        local term = substr("`remaining'", 1, `pluspos'-1)
                        local terms "`terms' `term'"
                        local remaining = substr("`remaining'", `pluspos'+1, .)
                    }
                }

                /* Process each term. Restore scientific-notation
                   exponents that were sentinel-protected above. */
                foreach term of local terms {
                    local term = subinstr("`term'", "~M~", "-", .)
                    local term = subinstr("`term'", "~P~", "+", .)
                    local term = strtrim("`term'")
                    if "`term'" == "" continue

                    /* Parse coefficient*variable or just variable or constant */
                    local coef = `side_sign'
                    local varname "`term'"
                    local is_constant = 0

                    /* Check for negative sign */
                    if substr("`term'", 1, 1) == "-" {
                        local coef = -1 * `side_sign'
                        local term = substr("`term'", 2, .)
                        local varname "`term'"
                    }

                    /* Check for explicit coefficient (e.g., 2*x or 0.5*x) */
                    local starpos = strpos("`term'", "*")
                    if `starpos' > 0 {
                        local numpart = substr("`term'", 1, `starpos'-1)
                        local varname = substr("`term'", `starpos'+1, .)
                        local coef = `coef' * real("`numpart'")
                        if missing(`coef') {
                            di as err "invalid coefficient in expression: `numpart'"
                            exit 198
                        }
                    }
                    else {
                        /* Check if the term is a pure number (constant) */
                        local testnum = real("`varname'")
                        if !missing(`testnum') {
                            local is_constant = 1
                        }
                    }

                    /* Reject a literal `.' (missing-value) reaching this
                       point. The constant-detection above maps `.' to
                       testnum=., is_constant stays 0, and the coef-name
                       lookup below would error rc=111 "not found among
                       estimated coefficients" — misleading for a
                       missing-value input. Refuse explicitly. */
                    if "`varname'" == "." {
                        di as err "missing value (.) in restriction expression: `expr'"
                        exit 198
                    }

                    if `is_constant' {
                        /* This is a constant term - add to c vector (move to RHS) */
                        /* If expression has +constant, we test Rb = constant, so c = constant */
                        /* If expression has -constant, we test Rb = -constant */
                        matrix `cvec'[`i', 1] = `cvec'[`i', 1] - `coef' * real("`varname'")
                    }
                    else {
                        /* Find variable position. Direct match against
                           e(b)'s colnames first; on miss, the
                           normalized list so factor inputs like
                           `1.rep78` resolve to base columns
                           `1b.rep78`. */
                        local varname = strtrim("`varname'")
                        local pos : list posof `"`varname'"' in cn
                        if `pos' == 0 {
                            local pos : list posof `"`varname'"' in cn_norm
                        }
                        if `pos' == 0 {
                            di as err "`varname' not found among estimated coefficients"
                            exit 111
                        }

                        /* Add to R matrix */
                        matrix `Rmat'[`i', `pos'] = `Rmat'[`i', `pos'] + `coef'
                    }
                }
            }
        }
        local varlist = strtrim("`testnames'")
    }
    else {
        /* Original syntax: list of variable names */
        local varlist "`coeflist'"
        if "`varlist'" == "" {
            /* Default: test every estimated slope. Skip `_cons',
               and skip factor base levels (`Nb.var', `Nbn.var',
               `Nbo.var') and `_rmcoll' omitted markers (`o.x',
               `oL.x', etc.) — those sit at structurally-zero columns
               of `e(b)' / `e(V)' and adding them to R would zero out
               the corresponding row/col of VR_t, making syminv(VR_t)
               return a g-inverse that hides the degeneracy and
               inflates q. */
            foreach v of local cn {
                if "`v'" == "_cons" continue
                if regexm("`v'", "^o([LFDS][0-9]*)*\.") continue
                if regexm("`v'", "[0-9]+b[no]?\.") continue
                local varlist "`varlist' `v'"
            }
        }

        /* Reject bare equality syntax (e.g., `harwald x1=0',
           `harwald x1=x2'). Bare mode is a list of coefficient names
           and the lookup below treats `x1=0' as a single token,
           failing with the confusing "not found among estimated
           coefficients" error. The parens-mode parser supports
           equality restrictions; redirect users there. */
        if strpos("`varlist'", "=") > 0 {
            di as err "bare equality syntax not supported in this mode"
            di as err `"wrap each restriction in parentheses: "harwald (x1=0)" or "harwald (x1=x2)""'
            exit 198
        }

        local selrows
        local dropped_zero ""
        foreach v of local varlist {
            /* Factor base levels (`Nb.var', `Nbn.var', `Nbo.var')
               and `_rmcoll' omitted markers (`o.x', `oL.x', ...)
               sit at structural-zero columns of `e(b)' / `e(V)';
               testing them yields a zero row/col of `VR_t' and an
               inflated q. Drop and log (the default path at lines
               258-263 skips silently; here the user typed them). */
            if regexm("`v'", "^o([LFDS][0-9]*)*\.") | ///
               regexm("`v'", "[0-9]+b[no]?\.") {
                local dropped_zero "`dropped_zero' `v'"
                continue
            }
            local pos : list posof `"`v'"' in cn
            if `pos' == 0 {
                local pos : list posof `"`v'"' in cn_norm
            }
            if `pos' == 0 {
                di as err "`v' not found among estimated coefficients"
                exit 111
            }
            /* Resolved column may be a structural zero even when the
               typed name lacked the `b'/`o.' marker (e.g. `1.rep78'
               -> base `1b.rep78' via `cn_norm', or an `_rmcoll'
               drop with no `o.' alias). Check `e(b)' / `e(V)'. */
            local b_val = `b'[1, `pos']
            local v_val = `V'[`pos', `pos']
            if `b_val' == 0 & `v_val' == 0 {
                local matched : word `pos' of `cn'
                local dropped_zero "`dropped_zero' `v' (-> `matched')"
                continue
            }
            local selrows "`selrows' `pos'"
        }
        if "`dropped_zero'" != "" {
            di as txt "note: structural-zero column(s) dropped from joint test (factor base / collinearity-omitted):`dropped_zero'"
        }

        local q = wordcount("`selrows'")
        if (`q' == 0) {
            if "`coeflist'" == "" {
                /* No args + model has no estimable slopes (constant
                   only, or every slope is base/omitted): refuse. */
                di as err "no coefficients to test (model has no estimable slopes)"
                exit 198
            }
            /* User named columns, all filtered as structural zeros.
               Match Stata `test 1b.rep78' (Constraint 1 dropped;
               F(0, ...) = .; r(F)=.; r(df)=0; rc=0). */
            return scalar F = .
            return scalar q = 0
            return scalar df = 0
            return scalar df_r = .
            return scalar p = .
            return scalar cv = .
            exit 0
        }
        matrix `Rmat' = J(`q', `k', 0)
        matrix `cvec' = J(`q', 1, 0)
        local i = 1
        foreach pos of local selrows {
            matrix `Rmat'[`i', `pos'] = 1
            local ++i
        }
    }

    tempname Wald mtmp
    matrix `bR_t' = `Rmat'*`b'' - `cvec'
    matrix `VR_t' = `Rmat'*`V'*`Rmat''

    /* Drop rows with zero variance: VR_t[i,i] = 0 covers both
       no-coefficient-term restrictions (Rmat row is all zero, e.g.
       `(1=0)') and parens-mode hits on structural-zero columns of
       `V' (factor base / `_rmcoll' omitted; bare-mode names were
       regex-filtered above). syminv(VR_t) on such rows returns a
       g-inverse that leaves q inflated, silently understating
       F = W/q. Stata `test' drops the same way with the
       "Constraint N dropped" notice. */
    local q_orig = `q'
    local keep_rows ""
    forvalues __i = 1/`q_orig' {
        if `VR_t'[`__i', `__i'] != 0 {
            local keep_rows "`keep_rows' `__i'"
        }
    }
    local q_testable : word count `keep_rows'
    if `q_testable' < `q_orig' {
        if `q_testable' == 0 {
            di as txt "note: no testable restriction (zero variance for every restriction); F, p, crit. value left missing"
            return scalar F = .
            return scalar q = 0
            return scalar df = 0
            return scalar df_r = .
            return scalar p = .
            return scalar cv = .
            exit 0
        }
        di as txt "note: " `q_orig' - `q_testable' " restriction(s) not testable against e(V) — dropped from joint test (testable q = " `q_testable' " of " `q_orig' ")"
        tempname Rmat_t cvec_t
        matrix `Rmat_t' = J(`q_testable', `k', 0)
        matrix `cvec_t' = J(`q_testable', 1, 0)
        local new_i = 0
        foreach old_i of local keep_rows {
            local ++new_i
            forvalues __j = 1/`k' {
                matrix `Rmat_t'[`new_i', `__j'] = `Rmat'[`old_i', `__j']
            }
            matrix `cvec_t'[`new_i', 1] = `cvec'[`old_i', 1]
        }
        matrix `Rmat' = `Rmat_t'
        matrix `cvec' = `cvec_t'
        matrix `bR_t' = `Rmat'*`b'' - `cvec'
        matrix `VR_t' = `Rmat'*`V'*`Rmat''
        local q = `q_testable'
    }

    /* Reduce q to rank(Rmat) when the row set is linearly dependent
       (duplicates, sum-to-zero pairs). `syminv(VR_t)' already gives
       the rank-r Wald scalar on the row column space; only q in
       F = W/q and `Ftail(q, ...)' / `invFtail(q, ...)' must reflect
       the rank. Rank is checked on `Rmat' not `VR_t' so that
       insufficient-estimator-df cases (rank-deficient `V' on its
       active columns, e.g. EWC nu < q) remain governed by the
       `df_fb - q + 1 <= 0' refusal below, not silently absorbed. */
    mata: st_local("_har_rmat_rank", strofreal(rank(st_matrix("`Rmat'"))))
    local q_rank = `_har_rmat_rank'
    if `q_rank' < `q' {
        di as txt "note: restriction set is rank-deficient (rank " `q_rank' " < q = " `q' "); reducing joint-test q to " `q_rank'
        local q = `q_rank'
    }

    matrix `mtmp' = `bR_t''*syminv(`VR_t')*`bR_t'
    scalar `Wald' = `mtmp'[1,1]
    scalar `factor_t' = 1
    if inlist("`est'","ewc","ewp") {
        scalar `factor_t' = (e(df_fb) - `q' + 1)/e(df_fb)
    }
    scalar `Fstat_t' = `factor_t'*`Wald'/`q'

    scalar `df2_t'  = .
    scalar `pval_t' = .
    scalar `cv_t'   = .

    if inlist("`est'","ewc","ewp") {
        scalar `df2_t' = e(df_fb) - `q' + 1
        if missing(`df2_t') | `df2_t'<=0 {
            di as err "insufficient fixed-b degrees of freedom for joint test"
            exit 498
        }
        scalar `pval_t' = Ftail(`q', `df2_t', `Fstat_t')
        scalar `cv_t'   = invFtail(`q', `df2_t', 1-`lev')
    }
    else {
        local S = e(bw)
        local T = e(N)
        /* Tukey kernel-equivalent df nu = T / (S * integral_k_squared)
           integral_k_squared = 2/3 (Bartlett) -> nu = (3/2)*T/S
           integral_k_squared = 6/5 (QS)      -> nu = (5/6)*T/S
           Used for the printed header denominator df and the q > nu
           sanity warning. The p-value and critical value below come
           from the simulator (`harwald_sim'), not from F(q, nu) */
        local _kernel_constant = cond("`est'"=="nw", 3/2, 5/6)
        scalar `df2_t' = ceil(`_kernel_constant' * `T' / `S')
        if (`q' > `S' | `q' > `df2_t') {
            di as txt "warning: q = `q' exceeds " ///
                cond(`q' > `S', "truncation parameter S = `S'", "kernel-equivalent df = " + string(`df2_t')) ///
                "; the simulated null distribution may be unreliable for joint tests of this dimension"
        }
        tempname cvF_sim_t pF_sim_t
        mata: harwald_sim("`est'", `S', `T', `critdraws', `q', st_numscalar("`Fstat_t'"), `lev', "`cvF_sim_t'", "`pF_sim_t'")
        scalar `cv_t'   = `cvF_sim_t'
        scalar `pval_t' = `pF_sim_t'
    }

    local df2_int = floor(`df2_t')
    local _p_label = cond(inlist("`est'","nw","qs"), "p (sim)", "p")
    local varlist = strtrim("`varlist'")
    di as txt "HAR Wald test of coefficients: `varlist'"
    di as txt "F(" as res `q' as txt "," as res `df2_int' as txt ")" ///
        " = " as res %9.2f `Fstat_t' as txt ",  `_p_label' = " as res %6.4f `pval_t' ///
        as txt "  (crit. val. @ " %4.1f (`lev'*100) "% : " as res %6.4f `cv_t' as txt ")"

    return scalar F = `Fstat_t'
    return scalar q = `q'
    return scalar df = `q'
    return scalar df_r = `df2_t'
    return scalar p = `pval_t'
    return scalar cv = `cv_t'
end