*! 1.1.0 Ariel Linden 08Jan2026 // fixed by() to allow a string variable and use value labels *! 1.0.0 Ariel Linden 22Dec2025 program define adtest, rclass version 11.0 syntax varname [if] [in], BY(varname) /// [Reps(integer 1000) SEED(string) Power(real 2)] tempvar touse marksample touse, novarlist markout `touse' `varlist' // handle the by() variable separately to avoid markout issues with strings qui replace `touse' = 0 if missing(`by') local depvar `varlist' local origby "`by'" // check if there are any observations in the sample qui count if `touse' if r(N) == 0 { di as error "no observations in the sample" exit 2000 } // get distinct values of by() variable qui levelsof `by' if `touse', local(by_vals) local num_groups : word count `by_vals' if `num_groups' != 2 { di as error "{bf:by()} variable must have exactly 2 distinct values" if `num_groups' == 1 di as error "found 1 group: `by_vals'" else di as error "found `num_groups' groups: `by_vals'" exit 420 } // extract the two group values local group1_str : word 1 of `by_vals' local group2_str : word 2 of `by_vals' // check if by() variable needs to be converted to numeric capture confirm numeric variable `by' if _rc { // variable is string - create numeric version tempvar numby qui gen `numby' = . qui replace `numby' = 0 if `origby' == "`group1_str'" & `touse' qui replace `numby' = 1 if `origby' == "`group2_str'" & `touse' // use original string values for display local group1 "`group1_str'" local group2 "`group2_str'" local by "`numby'" } else { // variable is numeric - get value labels if they exist capture local label1 : label (`by') `group1_str' capture local label2 : label (`by') `group2_str' // use labels if available, otherwise use numeric values if "`label1'" == "" local label1 "`group1_str'" if "`label2'" == "" local label2 "`group2_str'" local group1 "`label1'" local group2 "`label2'" } // set seed if "`seed'" != "" { set seed `seed' } local inis `=c(seed)' // run AD test mata: ad_by_test2("`depvar'", "`by'", "`touse'", `reps', `power') // display results di _n "Two-sample Anderson-Darling test for equality of distribution functions" di "{hline 71}" di "Outcome: `depvar'" di "Groups: `origby' (`group1' vs `group2')" di "{hline 25}" di "Test Statistic: " %6.4f r(stat) di "{it:P}-value: " %6.4f r(p) " (based on `reps' permutations)" // save return values return scalar stat = r(stat) return scalar p = r(p) return scalar reps = `reps' return scalar power = `power' return local group1 = "`group1'" return local group2 = "`group2'" return local by = "`origby'" // Clean up temporary variable if created if "`numby'" != "" { capture drop `numby' } end version 11.0 mata: mata clear void ad_by_test2(depvar, byvar, touse, reps, power) { // grab data from Stata st_view(y = ., ., depvar, touse) st_view(g = ., ., byvar, touse) // remove any rows where either variable is missing nonmiss = (y :!= . :& g :!= .) y = select(y, nonmiss) g = select(g, nonmiss) // get groups groups = uniqrows(g) if (rows(groups) != 2) { errprintf("Need exactly 2 groups\n") st_numscalar("r(stat)", .) st_numscalar("r(p)", .) return } g1 = groups[1] g2 = groups[2] // extract group data v1 = select(y, g :== g1) v2 = select(y, g :== g2) // observed statistic obs = ad_compute(v1, v2, power) st_numscalar("r(stat)", obs) // permutation test N = rows(y) n1 = rows(v1) n2 = rows(v2) count = 0 for (r = 1; r <= reps; r++) { // shuffle all values idx = order(runiform(N, 1), 1) shuf = y[idx] // calculate the stat for permutation stat = ad_compute(shuf[1::n1], shuf[(n1+1)::N], power) if (stat >= obs) count++ } p = (count + 1) / (reps + 1) st_numscalar("r(p)", p) } real scalar ad_compute(a, b, pwr) { n1 = rows(a); n2 = rows(b); n = n1 + n2 d = a \ b e = J(n1,1,1/n1) \ J(n2,1,0) f = J(n1,1,0) \ J(n2,1,1/n2) idx = order(d,1) d = d[idx]; e = e[idx]; f = f[idx] out = 0; Ec = 0; Fc = 0; Gc = 0; dup = 1 for (i=1; i<=n-1; i++) { Ec = Ec + e[i]; Fc = Fc + f[i]; Gc = Gc + 1/n sd = sqrt((2*Gc*(1-Gc))/n) h = abs(Fc - Ec) if (d[i] != d[i+1]) { out = out + ((h/sd)^pwr) * dup dup = 1 } else { dup = dup + 1 } } return(out) } end