def reward(self, history_id, rewards):
"""Reward the previous action with reward.
Parameters
----------
history_id : int
The history id of the action to reward.
rewards : dictionary
The dictionary {action_id, reward}, where reward is a float.
"""
context = (self._history_storage
.get_unrewarded_history(history_id)
.context)
# Update the model
model = self._model_storage.get_model()
B = model['B'] # pylint: disable=invalid-name
f = model['f']
for action_id, reward in six.viewitems(rewards):
context_t = np.reshape(context[action_id], (-1, 1))
B += context_t.dot(context_t.T) # pylint: disable=invalid-name
f += reward * context_t
mu_hat = np.linalg.inv(B).dot(f)
self._model_storage.save_model({'B': B, 'mu_hat': mu_hat, 'f': f})
# Update the history
self._history_storage.add_reward(history_id, rewards)
评论列表
文章目录