def reward(self, history_id, rewards):
"""Reward the previous action with reward.
Parameters
----------
history_id : int
The history id of the action to reward.
rewards : dictionary
The dictionary {action_id, reward}, where reward is a float.
"""
# Update the model
model = self._model_storage.get_model()
total_action_reward = model['total_action_reward']
action_times = model['action_times']
for action_id, reward in six.viewitems(rewards):
total_action_reward[action_id] += reward
action_times[action_id] += 1
model['n_rounds'] += 1
self._model_storage.save_model(model)
# Update the history
self._history_storage.add_reward(history_id, rewards)
评论列表
文章目录