def _exp4p_score(self, context):
"""The main part of Exp4.P.
"""
advisor_ids = list(six.viewkeys(context))
w = self._modelstorage.get_model()['w']
if len(w) == 0:
for i in advisor_ids:
w[i] = 1
w_sum = sum(six.viewvalues(w))
action_probs_list = []
for action_id in self.action_ids:
weighted_exp = [w[advisor_id] * context[advisor_id][action_id]
for advisor_id in advisor_ids]
prob_vector = np.sum(weighted_exp) / w_sum
action_probs_list.append((1 - self.n_actions * self.p_min)
* prob_vector
+ self.p_min)
action_probs_list = np.asarray(action_probs_list)
action_probs_list /= action_probs_list.sum()
estimated_reward = {}
uncertainty = {}
score = {}
for action_id, action_prob in zip(self.action_ids, action_probs_list):
estimated_reward[action_id] = action_prob
uncertainty[action_id] = 0
score[action_id] = action_prob
self._modelstorage.save_model(
{'action_probs': estimated_reward, 'w': w})
return estimated_reward, uncertainty, score
评论列表
文章目录