def run_single_regret(bandit_list,bandit_params,plays):
sum_probs_chosen=0
opt=np.zeros(plays)
chosen=np.zeros(plays)
bandit_probs = [x.get_prob() for x in bandit_list]
opt_solution = max(bandit_probs)
for i in range(0,plays):
index = sample_distributions_and_choose(bandit_params)
sum_probs_chosen+=bandit_probs[index]
if(bandit_list[index].pull_handle()):
bandit_params[index]=\
(bandit_params[index][0]+1,bandit_params[index][1])
else:
bandit_params[index]=\
(bandit_params[index][0],bandit_params[index][1]+1)
opt[i] = (i+1)*opt_solution
chosen[i] = sum_probs_chosen
regret_total = map(sub,opt,chosen)
return regret_total
#7.9
#Plot params beforehand
评论列表
文章目录