def run(self,T,model):
trials_per_action = T/model.K
success = model.sample_multiple(range(model.K),trials_per_action)
self.u = np.true_divide(success,trials_per_action)
self.best_action = argmax_rand(self.u)
return max(model.expected_rewards) - model.expected_rewards[self.best_action]
评论列表
文章目录