def _linthompsamp_score(self, context):
"""Thompson Sampling"""
action_ids = list(six.viewkeys(context))
context_array = np.asarray([context[action_id]
for action_id in action_ids])
model = self._model_storage.get_model()
B = model['B'] # pylint: disable=invalid-name
mu_hat = model['mu_hat']
v = self.R * np.sqrt(24 / self.epsilon
* self.context_dimension
* np.log(1 / self.delta))
mu_tilde = self.random_state.multivariate_normal(
mu_hat.flat, v**2 * np.linalg.inv(B))[..., np.newaxis]
estimated_reward_array = context_array.dot(mu_hat)
score_array = context_array.dot(mu_tilde)
estimated_reward_dict = {}
uncertainty_dict = {}
score_dict = {}
for action_id, estimated_reward, score in zip(
action_ids, estimated_reward_array, score_array):
estimated_reward_dict[action_id] = float(estimated_reward)
score_dict[action_id] = float(score)
uncertainty_dict[action_id] = float(score - estimated_reward)
return estimated_reward_dict, uncertainty_dict, score_dict
评论列表
文章目录