linthompsamp.py 文件源码-python代码片段

linthompsamp.py 文件源码

python

阅读 26 收藏 0 点赞 0 评论 0

def reward(self, history_id, rewards):
        """Reward the previous action with reward.

        Parameters
        ----------
        history_id : int
            The history id of the action to reward.

        rewards : dictionary
            The dictionary {action_id, reward}, where reward is a float.
        """
        context = (self._history_storage
                   .get_unrewarded_history(history_id)
                   .context)

        # Update the model
        model = self._model_storage.get_model()
        B = model['B']  # pylint: disable=invalid-name
        f = model['f']

        for action_id, reward in six.viewitems(rewards):
            context_t = np.reshape(context[action_id], (-1, 1))
            B += context_t.dot(context_t.T)  # pylint: disable=invalid-name
            f += reward * context_t
            mu_hat = np.linalg.inv(B).dot(f)
        self._model_storage.save_model({'B': B, 'mu_hat': mu_hat, 'f': f})

        # Update the history
        self._history_storage.add_reward(history_id, rewards)