def _compute_score(self, context):
'''
Args:
context (list)
Returns:
(dict):
K (str): action
V (float): score
'''
a_inv = self.model['act_inv']
theta = self.model['theta']
estimated_reward = {}
uncertainty = {}
score_dict = {}
max_score = 0
for action_id in xrange(len(self.actions)):
action_context = np.reshape(context[action_id], (-1, 1))
estimated_reward[action_id] = float(theta[action_id].T.dot(action_context))
uncertainty[action_id] = float(self.alpha * np.sqrt(action_context.T.dot(a_inv[action_id]).dot(action_context)))
score_dict[action_id] = estimated_reward[action_id] + uncertainty[action_id]
return score_dict
评论列表
文章目录