def run(self,T,model):
self.trials = np.zeros(model.K)
self.success = np.zeros(model.K)
for t in xrange(T):
x,y = model.sample(model.K-1)
xij = np.hstack((1-x,x,1)) # first N actions represent x_i = 0,2nd N x_i=1, last do()
self.trials += xij
self.success += y*xij
self.u = np.true_divide(self.success,self.trials)
self.best_action = argmax_rand(self.u)
return max(model.expected_rewards) - model.expected_rewards[self.best_action]
评论列表
文章目录