clear all * Set PERSONAL ado folder to the folder where the github ado folder is located * Then the programs could be run immediately *sysdir set PERSONAL "C:\Users\JKP\Dropbox\batched bandits\review_package\bbandits\ado" sysdir set PERSONAL "C:\Users\JKP\Dropbox\batched bandits\submission_acceptance\package\ado" * specify a log file log using example.log, replace ********************************************************************************** ****** Reproduce paper specifications ********************************************************************************** *** Figure 1 - Graph will always look slightly different because there is no seed for the bbandits_sim function bbandits_sim 0.5 0.4 0.3, size(100) batch(100) clipping(0.05) decay(0.9) thompson **** Cumulative rewards ***** set seed 12345 local batch_length = 100 * the classic experiment * Generate Bernoulli samples with probability 0.5 gen bernoulli1 = runiform() < 0.5 * Generate Bernoulli samples with probability 0.4 gen bernoulli2 = runiform() < 0.4 * Generate Bernoulli samples with probability 0.3 gen bernoulli3 = runiform() < 0.3 gen reward_classic = . * Replace the first 3333 observations with bernoulli1 replace reward_classic = bernoulli1 in 1/1000 * Replace the next 3333 observations with bernoulli2 replace reward_classic = bernoulli2 in 1001/2000 * Replace the last 3334 observations with bernoulli3 replace reward_classic = bernoulli3 in 2001/`=_N' ** Only optimal arm gen optimal = bernoulli1 * shuffle variable shufflevar reward_classic, dropold rename reward_classic_shuffled reward_classic collapse (mean) reward (mean) reward_classic (mean) optimal, by(batch) * Create a cumulative sum and count of rewards * Batched gen cumulative_sum = sum(reward) gen cumulative_count = _n * Calculate the cumulative mean gen cumulative_reward = cumulative_sum / cumulative_count * classic gen cumulative_sum_classic = sum(reward_classic) gen cumulative_count_classic = _n * Calculate the cumulative mean gen cumulative_reward_classic = cumulative_sum_classic / cumulative_count_classic * optimal gen cumulative_sum_optimal = sum(optimal) gen cumulative_count_optimal = _n gen cumulative_reward_optimal = cumulative_sum_optimal / cumulative_count_optimal set scheme sj local batch_length = `batch_length' -1 * Generate the line plot twoway (line cumulative_reward batch) (line cumulative_reward_classic batch) (line cumulative_reward_optimal batch), xlabel(0(10)`batch_length') ylabel(0.30(0.05)0.60) /// title("Cumulative Mean Reward by Batch") /// xtitle("Batch") ytitle("Cumulative Mean Reward") /// legend(order(1 "Bandit" 2 "Classic Experiment" 3 "Optimal only")) graphregion(color(white)) plotregion(color(white)) **** Figure 2 - Graph will always look slightly different because there is no seed for the bbandits_sim function bbandits_sim 0.5 0.4 0.3, size(200) batch(10) clipping(0.1) thompson plot_thompson ***** Empirical examples from section 6 ****** *Kasy, M. and Sautmann, A. (2021), Adaptive Treatment Assignment in Experiments for Policy Choice. Econometrica, 89: 113-132. https://doi.org/10.3982/ECTA17527 * Generate Table on page 20 and Figure 3 and 4 use "example data\kasy_sautmann_2021.dta", clear bbandits outcome treatment date , twoptions_sharebybatch(ylabel(0(0.1)0.6)) /* graph export "figures\ShareArmSelected_kasy_sautmann.png", replace width(1280) name(ShareArmSelected) graph export "figures\ShareByBatch_kasy_sautmann.png", replace width(1280) name(ShareByBatch) graph export "figures\StackedShareArmSelected_kasy_sautmann.png", replace width(1280) name(StackedShareByBatch) graph export "figures\BOLS_kasy_sautmann.png", replace width(1280) name(BOLS) graph export "figures\OLS_kasy_sautmann.png", replace width(1280) name(OLS) graph export "figures\CumSharesByBatch_kasy_sautmann.png", replace width(1280) name(CumSharesByBatch) */ * Generates among others Figure 5 and Figure 6 use "example data\gaul_et_al_2024.dta", clear bbandits reward selected trial , twoptions_sharebybatch(ylabel(0(0.1)0.6)) /* graph export "figures\ShareArmSelected_gaul_et_al.png", replace width(1280) name(ShareArmSelected) graph export "figures\OLS_gaul_et_al.png", replace width(1280) name(OLS) graph export "figures\CumSharesByBatch_gaul_et_al.png", replace width(1280) name(CumSharesByBatch) */ ********************************************************************************** ****** Showcase pacakge functions ********************************************************************************** set scheme stcolor // Use colored graphic scheme ********************************************************* *** bbandit_sim simulation command *** ********************************************************* *** Epsilon greedy **** * Default - greedy bbandits_sim 1 1 // most simple * with many arms and adjusted epsilon rate bbandits_sim 1 1 2 5 6, size(100) batch(10) eps(0.3) // size not divisible by 10 * add eploration phase and decay rate for greedy algorithm bbandits_sim 1 1 2, size(200) batch(10) eps(0.2) decay(0.9) exploration(3) greedy matrix list e(decay_rate) // bbandit_sim returns the decay rate of epsilon_greedy * Change Standard deviation bbandits_sim 1 1 2, standard_deviations(1 2 3) *** Bernoulli Thompson Sampling **** bbandits_sim 0.5 0.5 , size(100) batch(10) clipping(0.05) thompson * many arms bbandits_sim 0.1 0.5 0.3 0.2, size(500) batch(10) clipping(0.1) thompson *** Monte Carlo simulation *** * Epsilon Greedy bbandits_sim 1 1, monte_carlo greedy reference_arm(0) arm(1) standard_deviations(1 2) test_value(0) n(1000) eps(0.2) // works * Thompson Sampling bbandits_sim 0.5 0.5, monte_carlo thompson n(2000) ******************************************************************* ******* bbandits - Analysis/Inference command ******************************************************************* *** Inference bbandits_sim 1 2 1 , greedy eps(0.2) standard_deviations(1 1 1) // From epsilon_greedy algorithm bbandits reward chosen_arm batch // analyse data with bbandits * change reference arm bbandits reward chosen_arm batch, reference_arm(1) no_plot // reference arm goes from 0 to k * simulate data with thompson algorithm bbandits_sim 0.5 0.5 0.3 , size(100) batch(10) clipping(0.1) thompson * Analyse and plot beta distributions from thompson sampling bbandits reward chosen_arm batch, plot_thompson reference_arm(0) * analyse weights matrix list e(batched_ols_weights) matrix bols_by_weights = e(batched_ols_weights)' * e(batch_ols_coefficients) * multiply weights times batch OLS estimates to get BOLS --> weighted average matrix list bols_by_weights // BOLS estimates are on the diagonal ***************************************************************************** ******** bbandit_initializ/bbandit_update - Run your own adaptive experiment ***************************************************************************** ******** Epsilon Greedy ********************** ******* Conduct your own experiment - Fictious school example section 7 ************** clear set obs 1000 // Create 1000 observations - defines the total size of the experiment gen ID = "" // Initialize the string variable // Loop to populate the variable with "school1", "school2", ..., "school1000" forval i = 1/1000 { qui replace ID = "school_" + string(`i') if _n == `i' } ****** Use bbandit_initialize ******** bbandits_initialize, batches(10) arms(3) exploration_phase(2) // 10 equally sized batches, 3 treatment arms, 2 exploration periods ***** Assign treatment and observe rewards in exploration phase ****** generate rand = runiform() replace reward = . forval i = 1/2{ replace reward = 0 if batch == `i' replace reward = 1 if rand < 0.4 & batch == `i' & chosen_arm == 1 replace reward = 1 if rand < 0.5 & batch == `i' & chosen_arm == 2 replace reward = 1 if rand < 0.6 & batch == `i' & chosen_arm == 3 } bbandits_update reward chosen_arm_numeric batch, greedy eps(0.3) // excel export option can be added forval i = 3/10{ display `i' replace reward = 0 if batch == `i' replace reward = 1 if rand < 0.4 & batch == `i' & chosen_arm_numeric == 0 replace reward = 1 if rand < 0.5 & batch == `i' & chosen_arm_numeric == 1 replace reward = 1 if rand < 0.6 & batch == `i' & chosen_arm_numeric == 2 capture bbandits_update reward chosen_arm_numeric batch, greedy eps(0.3) } bbandits reward chosen_arm_numeric batch ********* Thompson Sampling with many arms ********************* * simulate and test from thompson clear clear set obs 5000 // Create 5000 observations gen ID = "" // Initialize the string variable // Loop to populate the variable with "school1", "school2", ..., "school1000" forval i = 1/5000 { qui replace ID = "school_" + string(`i') if _n == `i' } ****** Use bbandit_initialize ******** bbandits_initialize, batches(10) arms(15) exploration_phase(2) // 10 equally sized batches, 3 treatment arms, 2 exploration periods ***** Assign treatment and observe rewards in exploration phase ****** generate rand = runiform() replace reward = . forval i = 1/2{ replace reward = 0 if batch == `i' replace reward = 1 if rand < 0.20 & batch == `i' & chosen_arm_numeric == 0 replace reward = 1 if rand < 0.25 & batch == `i' & chosen_arm_numeric == 1 replace reward = 1 if rand < 0.30 & batch == `i' & chosen_arm_numeric == 2 replace reward = 1 if rand < 0.35 & batch == `i' & chosen_arm_numeric == 3 replace reward = 1 if rand < 0.40 & batch == `i' & chosen_arm_numeric == 4 replace reward = 1 if rand < 0.45 & batch == `i' & chosen_arm_numeric == 5 replace reward = 1 if rand < 0.50 & batch == `i' & chosen_arm_numeric == 6 replace reward = 1 if rand < 0.55 & batch == `i' & chosen_arm_numeric == 7 replace reward = 1 if rand < 0.60 & batch == `i' & chosen_arm_numeric == 8 replace reward = 1 if rand < 0.65 & batch == `i' & chosen_arm_numeric == 9 replace reward = 1 if rand < 0.70 & batch == `i' & chosen_arm_numeric == 10 replace reward = 1 if rand < 0.75 & batch == `i' & chosen_arm_numeric == 11 replace reward = 1 if rand < 0.80 & batch == `i' & chosen_arm_numeric == 12 replace reward = 1 if rand < 0.85 & batch == `i' & chosen_arm_numeric == 13 replace reward = 1 if rand < 0.90 & batch == `i' & chosen_arm_numeric == 14 } bbandits_update reward chosen_arm_numeric batch, thompson clipping(0.02) forval i = 3/10 { display `i' replace reward = 0 if batch == `i' replace reward = 1 if rand < 0.20 & batch == `i' & chosen_arm_numeric == 0 replace reward = 1 if rand < 0.25 & batch == `i' & chosen_arm_numeric == 1 replace reward = 1 if rand < 0.30 & batch == `i' & chosen_arm_numeric == 2 replace reward = 1 if rand < 0.35 & batch == `i' & chosen_arm_numeric == 3 replace reward = 1 if rand < 0.40 & batch == `i' & chosen_arm_numeric == 4 replace reward = 1 if rand < 0.45 & batch == `i' & chosen_arm_numeric == 5 replace reward = 1 if rand < 0.50 & batch == `i' & chosen_arm_numeric == 6 replace reward = 1 if rand < 0.55 & batch == `i' & chosen_arm_numeric == 7 replace reward = 1 if rand < 0.60 & batch == `i' & chosen_arm_numeric == 8 replace reward = 1 if rand < 0.65 & batch == `i' & chosen_arm_numeric == 9 replace reward = 1 if rand < 0.70 & batch == `i' & chosen_arm_numeric == 10 replace reward = 1 if rand < 0.75 & batch == `i' & chosen_arm_numeric == 11 replace reward = 1 if rand < 0.80 & batch == `i' & chosen_arm_numeric == 12 replace reward = 1 if rand < 0.85 & batch == `i' & chosen_arm_numeric == 13 replace reward = 1 if rand < 0.90 & batch == `i' & chosen_arm_numeric == 14 capture bbandits_update reward chosen_arm_numeric batch, greedy eps(0.3) } bbandits reward chosen_arm_numeric batch //, plot_thompson ****************** successive arm elimination (SAE) ****************************** clear set obs 1000 // Create 1000 observations gen ID = "" // Initialize the string variable // Loop to populate the variable with "school1", "school2", ..., "school1000" forval i = 1/1000 { qui replace ID = "school_" + string(`i') if _n == `i' } bbandits_initialize, batches(5) arms(3) sae // 10 equally sized batches, 3 treatment arms, 2 exploration periods di "$active_arms_macro" * generate some rewards generate rand = runiform() replace reward = . forval i = 1/1{ replace reward = 0 if batch == `i' replace reward = 1 if rand < 0.4 & batch == `i' & chosen_arm == 1 replace reward = 1 if rand < 0.5 & batch == `i' & chosen_arm == 2 replace reward = 1.5 if rand < 0.8 & batch == `i' & chosen_arm == 3 } * Update according to the sequential arm elimination algorithm bbandits_update reward chosen_arm_numeric batch, sae active_arms("$active_arms_macro") batch_sae(5) forval i = 2/2{ replace reward = 0 if batch == `i' replace reward = 1 if rand < 0.4 & batch == `i' & chosen_arm_numeric == 0 replace reward = 1 if rand < 0.5 & batch == `i' & chosen_arm_numeric == 1 replace reward = 1.5 if rand < 0.8 & batch == `i' & chosen_arm_numeric == 2 } di "$active_arms_macro" bbandits_update reward chosen_arm_numeric batch, sae active_arms("$active_arms_macro") batch_sae(5) forval i = 3/3{ replace reward = 0 if batch == `i' replace reward = 1 if rand < 0.4 & batch == `i' & chosen_arm_numeric == 0 replace reward = 1 if rand < 0.5 & batch == `i' & chosen_arm_numeric == 1 replace reward = 1.5 if rand < 0.8 & batch == `i' & chosen_arm_numeric == 2 } bbandits_update reward chosen_arm_numeric batch, sae active_arms("$active_arms_macro") batch_sae(5) forval i = 4/4{ replace reward = 0 if batch == `i' replace reward = 1 if rand < 0.4 & batch == `i' & chosen_arm_numeric == 0 replace reward = 1 if rand < 0.5 & batch == `i' & chosen_arm_numeric == 1 replace reward = 1.5 if rand < 0.8 & batch == `i' & chosen_arm_numeric == 2 } bbandits_update reward chosen_arm_numeric batch, sae active_arms("$active_arms_macro") batch_sae(5) ** warning and exit if optimal arm was already detected log close