def reward(self, history_id, rewards):
"""Reward the previous action with reward.
Parameters
----------
history_id : int
The history id of the action to reward.
rewards : dictionary
The dictionary {action_id, reward}, where reward is a float.
"""
context = (self._history_storage
.get_unrewarded_history(history_id)
.context)
# Update the model
model = self._model_storage.get_model()
A = model['A'] # pylint: disable=invalid-name
A_inv = model['A_inv'] # pylint: disable=invalid-name
b = model['b']
theta = model['theta']
for action_id, reward in six.viewitems(rewards):
action_context = np.reshape(context[action_id], (-1, 1))
A[action_id] += action_context.dot(action_context.T)
A_inv[action_id] = np.linalg.inv(A[action_id])
b[action_id] += reward * action_context
theta[action_id] = A_inv[action_id].dot(b[action_id])
self._model_storage.save_model({
'A': A,
'A_inv': A_inv,
'b': b,
'theta': theta,
})
# Update the history
self._history_storage.add_reward(history_id, rewards)
评论列表
文章目录