*! bbandits, version 2, 24.01.2026 *! Authors: Jan Kemper, Davud Rostam-Afschar *** Import python functions * Import python functions used for the bbandits package. * All function are in the file bbandits_functions.py which has to be in the ado folder (likely under subfolder "py") python: from bbandits_functions import * end *** stata subroutines *** epsilon greedy * e_greedy * Runs a batched epsilon-greedy multi-armed bandit algorithm via Python. * Parses true arm means and (optionally) reward standard deviations from Stata input, * calls a Python function that simulates arm choices and rewards over batches, * and returns the chosen arms, observed rewards, and batch indicators as a Stata dataset. * Also stores the effective decay rate of epsilon as an e-class result. program e_greedy, eclass * syntax syntax anything [, Batch(int 25) Size(int 100) Eps(real 0.1) Exploration(int 0) Decay(real 1) Standard_deviations(numlist)] capture matrix drop greedy * Python parsing python: list_of_true_mean = `anything' # Read in anything python: list_of_true_mean = list_of_true_mean.split() # Split based on tabs python: list_of_true_mean = np.array(list_of_true_mean, dtype = float) # save as float array python: list_of_standard_deviations = "`standard_deviations'" python: list_of_standard_deviations = list_of_standard_deviations.split() python: list_of_standard_deviations = np.array(list_of_standard_deviations, dtype = float) python: results = check_and_run_greedy(list_of_true_mean, list_of_standard_deviations, greedy_alg_k, `eps', `batch', `size', `exploration', `decay') python: keys = ["chosen_arm_list", "rewards_list", "batch_indicator_list"] python: values = [results.get(key) for key in keys] python: mat_res = np.array(values).T * save as a matrix in back into stata python: Matrix.store("greedy", mat_res) * Save other outputs as estimation results python: decay_res = results["decay_rate"] python: Matrix.store("decay_res", decay_res) * transform into stata dataset clear qui svmat greedy, names(col) rename (c1 c2 c3) (chosen_arm reward batch) * return estimates ereturn matrix decay_rate = decay_res end *** bernoulli thompson * thompson_bernoulli * Implements batched Bernoulli Thompson Sampling using a Python backend. * Takes true success probabilities for each arm, runs Bayesian updating of Beta priors, * and returns simulated arm choices, rewards, and batch indicators as a stata dataset * Also retrieves and stores the evolving alpha/beta posterior parameters and clipping rates. * Optionally produces plots of the Beta posterior densities for each arm by batch. program thompson_bernoulli, eclass syntax anything [, Batch(int 25) Size(int 100) Clipping(real 0.05) Exploration(int 0) Decay(real 1) Plot_thompson STacked TWoptions(string asis)] *drop resulting matrices - otherwise problems when dimensions of outcome matrix changes capture matrix drop beta_list capture matrix drop alpha_list capture matrix drop thompson capture matrix drop clipping_rate capture matrix drop true * python implementation python: list_of_true_mean = `anything' python: list_of_true_mean = list_of_true_mean.split() python: list_of_true_mean = np.array(list_of_true_mean, dtype = float) * run epsilon greedy algorithm from python python: results = bernoulli_thompson_batched_clipping(list_of_true_mean, `batch', `size', `clipping', `exploration', `decay') python: keys = ["chosen_arm_list", "rewards_list", "batch_indicator_list"] python: values = [results.get(key) for key in keys] python: mat_res = np.array(values).T * Get alpha and beta values into stata python: keys = ["alpha_values_list", "beta_values_list"] python: values = [results.get(key) for key in keys] python: mat_alpha = np.array(values[0]) python: mat_beta = np.array(values[1]) * save as a matrix in back into stata python: Matrix.store("true", list_of_true_mean) python: Matrix.store("thompson", mat_res) python: Matrix.store("alpha_list", mat_alpha) python: Matrix.store("beta_list", mat_beta) * Save other outputs as estimation results python: clipping_rate = results["clipping_rate_list"] python: Matrix.store("clipping_rate", clipping_rate) ereturn matrix decay_rate = clipping_rate * transform into stata dataset clear qui svmat thompson, names(col) rename (c1 c2 c3) (chosen_arm reward batch) di `twoptions' if "`plot_thompson'" == "" & "`stacked'" != ""{ di in red "please use stacked with plot_thompson. That is specify as option:" in ye " plot_thompson stacked" } if "`plot_thompson'" != ""{ * get batch size local batch_size = rowsof(beta_list) local arms = colsof(beta_list) di "Number of arms: " + `arms' di "Number of batches: " + `batch_size' * forvalues i =1/`batch_size' { local name = "t" + "`i'" di "`name'" if "`stacked'" == ""{ local vertical_line local beta_densities local legend_label forv j=1/`=`arms'' { local beta_densities `beta_densities' (function y=betaden(alpha_list[`i', `j'], beta_list[`i', `j'] , x), range(0 1)) local vertical_line `vertical_line' xline(`=true[`j',1]', lpattern(dash) lc(gray) lwidth(medthick)) local legend_label `legend_label' label(`j' "Arm `j'") } twoway `beta_densities' /// , /// `vertical_line' /// name("`name'", replace) legend( `legend_label') `=`twoptions'' } if "`stacked'" != ""{ local combine forv j=1/`=`arms'' { tw (function y = betaden(alpha_list[`i', `j'], beta_list[`i', `j'],x), range(0.01 0.99) lwidth(medthick)), xline(`=true[`j',1]', lpattern(dash) lc(black) lwidth(medthick)) /// ytitle("B(`=alpha_list[`i', `j']',`=beta_list[`i', `j']')" "density") ylabel(#0, nolabels nogrid) xlabel(, nogrid) xtitle(Share of successes) plotr(m(zero)) name("g_`j'_`i'", replace) nodraw `=`twoptions'' /**/ local combine "`combine' g_`j'_`i'" } gr combine `combine', xcommon col(1) iscale(1) name("combine_`i'", replace) } } } end *** Epsilon Greedy Monte Carlo simulation * epsilon_greedy_simulation * Performs a Monte Carlo study of the epsilon-greedy bandit algorithm. * Repeats the full adaptive experiment N times in Python and collects test statistics * (standard OLS and heteroskedastic robust OLS/BOLS) for treatment effect inference. * Outputs the simulated test statistics as a Stata dataset and can plot their * empirical distributions against the standard normal benchmark. program epsilon_greedy_simulation version 17 syntax anything [, Batch(int 25) Size(int 100) Eps(real 0.1) N(int 1000) Decay(real 1) Reference_arm(int 0), Arm(int 1), Test_value(real 0.0) Exploration(int 0) Plot TWoptions(string asis) STandard_deviations(numlist)] capture matrix drop greedy_simulation python: list_of_true_mean = `anything' python: list_of_true_mean = list_of_true_mean.split() python: list_of_true_mean = np.array(list_of_true_mean, dtype = float) * correct for non given standard deviations if "`standard_deviations'" == "" { python: list_of_standard_deviations = [1] * len(list_of_true_mean) python: print("Standard deviations not provided. For all arms, standard deviations for the normal distributions of rewards are set to 1.") } else { python: list_of_standard_deviations = "`standard_deviations'" python: list_of_standard_deviations = list_of_standard_deviations.split() python: list_of_standard_deviations = np.array(list_of_standard_deviations, dtype = float) } * run epsilon greedy algorithm from python python: results = simulation_greedy(`n' ,list_of_true_mean, list_of_standard_deviations, `eps', `batch', `size', `exploration', `decay', `reference_arm', `arm', `test_value') python: keys = ["beta_test_statistic", "bols_test_statistic"] python: values = [results.get(key) for key in keys] python: mat_res = np.array(values).T * save as a matrix in back into stata python: Matrix.store("greedy_simulation", mat_res) * transform into stata dataset clear qui svmat greedy_simulation, names(col) rename (c1 c2) (beta_test_statistic bols_test_statistic) tokenize `anything' local k : display %04.3f `1' local l : display %04.3f `2' if "`plot'" != ""{ tw (hist beta_test_statistic , width(0.1)) /// (kdensity beta_test_statistic) /// (function y=normalden(x, 0, 1) , range( -3.5 3.5)) /// , legend(off) name(OLS, replace) xtitle("OLS test statistic with true values " "`k', `l'" " batch size=`=`size'' with `=`batch'' batches, repetitions=`=`n''" "Kernel density of normal distribution at zero") `=`twoptions'' xline(0, lp(dash) lc(gs6)) tw (hist bols_test_statistic , width(0.1)) /// (kdensity bols_test_statistic) /// (function y=normalden(x, 0, 1) , range(-4 4)) /// , legend(off) name(BOLS, replace) xtitle("BOLS test statistic with true values " "`k', `l'" " batch size=`=`size'' with `=`batch'' batches, repetitions=`=`n''" "Kernel density of normal distribution at zero") `=`twoptions'' xline(0, lp(dash) lc(gs6)) } end *** Bernoulli Thompson Monte Carlo simulation * bernoulli_thompson_simulation * Monte Carlo simulation for the Bernoulli Thompson Sampling algorithm. * Repeats the adaptive allocation process many times using binary rewards, * computes OLS and heteroskedastic-robust BOLS test statistics for a target arm comparison, * and returns their sampling distributions in Stata. * Optional plots compare the empirical distributions to the normal reference. program bernoulli_thompson_simulation version 17 syntax anything [, Batch(int 25) Size(int 100) Clipping(real 0.05) Test_value(real 0.0) Reference_arm(int 0) Arm(int 1) Exploration(int 0) N(int 1000) Plot TWoptions(string asis)] // Here clipping noch einführen * drop resulting matrix capture matrix drop thompson_simulation * python python: list_of_true_mean = `anything' python: list_of_true_mean = list_of_true_mean.split() python: list_of_true_mean = np.array(list_of_true_mean, dtype = float) * run epsilon greedy algorithm from python python: results = simulation_thompson(`n' ,list_of_true_mean, `batch', `size', `clipping', `exploration', `reference_arm', `arm', `test_value') python: keys = ["beta_test_statistic", "bols_test_statistic"] python: values = [results.get(key) for key in keys] python: mat_res = np.array(values).T * save as a matrix in back into stata python: Matrix.store("thompson_simulation", mat_res) * transform into stata dataset clear qui svmat thompson_simulation, names(col) rename (c1 c2) (beta_test_statistic bols_test_statistic) tokenize `anything' local k : display %04.3f `1' local l : display %04.3f `2' if "`plot'" != ""{ tw (hist beta_test_statistic , width(0.1)) /// (kdensity beta_test_statistic) /// (function y=normalden(x, 0, 1) , range( -4 4)) /// , legend(off) name(OLS, replace) xtitle("OLS test statistic with true values " "`k', `l'" " batch size=`=`size'' with `=`batch'' batches, repetitions=`=`n''" "Kernel density of normal distribution at zero") `=`twoptions'' xline(0, lp(dash) lc(gs6)) tw (hist bols_test_statistic , width(0.1)) /// (kdensity bols_test_statistic) /// (function y=normalden(x, 0, 1) , range(-4 4)) /// , legend(off) name(BOLS, replace) xtitle("BOLS test statistic with true values " "`k', `l'" " batch size=`=`size'' with `=`batch'' batches, repetitions=`=`n''" "Kernel density of normal distribution at zero") `=`twoptions'' xline(0, lp(dash) lc(gs6)) } end *** Master command * bbandits_sim * Master wrapper that selects and runs one of the bandit procedures. * Depending on options, it executes either epsilon-greedy or Thompson Sampling, * in single-run mode or Monte Carlo simulation mode. * Performs basic input validation (e.g., probability bounds, minimum batch size) * and forwards all relevant parameters to the appropriate subroutine, * optionally triggering posterior or simulation result plots. program bbandits_sim syntax anything [, Batch(int 25) Size(int 100) Eps(real 0.1) Clipping(real 0.05) EXploration_phase(int 0) Test_value(real 0.0) Greedy Thompson Monte_carlo N(int 1000) Decay(real 1) Reference_arm(int 0) Arm(int 1) Plot_thompson STacked TWopts(string asis) STAndard_deviations(numlist)] if ("`greedy'" != "" & "`monte_carlo'" == "") | ("`greedy'" == "" & "`thompson'" == "" ){ /// Default value epsilon greedy display "Epsilon greedy" * Check if the sample size (or any relevant scalar) is too small if `size' < 2 { display as error "ERROR: For Epsilon Greedy, the batch size has to be at least 2." exit 198 } e_greedy "`anything'", batch(`batch') size(`size') eps(`eps') decay(`decay') standard_deviations(`standard_deviations') exploration(`exploration_phase') } if "`thompson'" != "" & "`monte_carlo'" == "" { *** detect syntax errors * Input greater than 1 foreach var of local anything { // Check if the current input value is greater than 1 if `var' > 1 { // Display an error message di as error "Error: Input value `" `var' "' is greater than 1. The Bernoulli Thompson algorithm only permits valid probabilities ([0,1])" // Exit the program exit 198 } } display "Thompson algorithm" thompson_bernoulli "`anything'", batch(`batch') size(`size') clipping(`clipping') exploration(`exploration_phase') decay(`decay') `plot_thompson' `stacked' tw(`"`twopts'"') } *** Monte Carlo * Epsilon greedy if "`monte_carlo'" != "" & "`greedy'" != "" { display "Epsilon greedy - Monte Carlo simulation" epsilon_greedy_simulation "`anything'", batch(`batch') size(`size') eps(`eps') test_value(`test_value') reference_arm(`reference_arm') arm(`arm') n(`n') exploration(`exploration_phase') plot tw(`"`twopts'"') standard_deviations(`standard_deviations') /// Always plot } if "`monte_carlo'" != "" & "`thompson'" != "" { display "Bernoulli thompson sampling - Monte Carlo simulation" bernoulli_thompson_simulation "`anything'", batch(`batch') size(`size') clipping(`clipping') test_value(`test_value') reference_arm(`reference_arm') arm(`arm') n(`n') plot tw(`"`twopts'"') /// Always plot } end