ucb1.py 文件源码-python代码片段

ucb1.py 文件源码

python

阅读 27 收藏 0 点赞 0 评论 0

def reward(self, history_id, rewards):
        """Reward the previous action with reward.

        Parameters
        ----------
        history_id : int
            The history id of the action to reward.

        rewards : dictionary
            The dictionary {action_id, reward}, where reward is a float.
        """

        # Update the model
        model = self._model_storage.get_model()
        total_action_reward = model['total_action_reward']
        action_times = model['action_times']
        for action_id, reward in six.viewitems(rewards):
            total_action_reward[action_id] += reward
            action_times[action_id] += 1
            model['n_rounds'] += 1
        self._model_storage.save_model(model)
        # Update the history
        self._history_storage.add_reward(history_id, rewards)