def run(self,T,model):
self.trials = np.full(model.K,2,dtype=int)
self.success = np.full(model.K,1,dtype=int)
for t in xrange(T):
fails = self.trials - self.success
theta = np.random.beta(self.success,fails)
arm = argmax_rand(theta)
self.trials[arm] +=1
self.success[arm]+= model.sample_multiple(arm,1)
mu = np.true_divide(self.success,self.trials)
self.best_action = argmax_rand(mu)
return max(model.expected_rewards) - model.expected_rewards[self.best_action]
评论列表
文章目录