def _exp3_probs(self):
"""Exp3 algorithm.
"""
w = self._model_storage.get_model()['w']
w_sum = sum(six.viewvalues(w))
probs = {}
n_actions = self._action_storage.count()
for action_id in self._action_storage.iterids():
probs[action_id] = ((1 - self.gamma) * w[action_id]
/ w_sum
+ self.gamma / n_actions)
return probs
评论列表
文章目录