linucb.py 文件源码-python代码片段

def reward(self, history_id, rewards):
        """Reward the previous action with reward.

        Parameters
        ----------
        history_id : int
            The history id of the action to reward.

        rewards : dictionary
            The dictionary {action_id, reward}, where reward is a float.
        """
        context = (self._history_storage
                   .get_unrewarded_history(history_id)
                   .context)

        # Update the model
        model = self._model_storage.get_model()
        A = model['A']  # pylint: disable=invalid-name
        A_inv = model['A_inv']  # pylint: disable=invalid-name
        b = model['b']
        theta = model['theta']

        for action_id, reward in six.viewitems(rewards):
            action_context = np.reshape(context[action_id], (-1, 1))
            A[action_id] += action_context.dot(action_context.T)
            A_inv[action_id] = np.linalg.inv(A[action_id])
            b[action_id] += reward * action_context
            theta[action_id] = A_inv[action_id].dot(b[action_id])
        self._model_storage.save_model({
            'A': A,
            'A_inv': A_inv,
            'b': b,
            'theta': theta,
        })

        # Update the history
        self._history_storage.add_reward(history_id, rewards)