rewardplot.py 文件源码-python代码片段

rewardplot.py 文件源码

python

阅读 31 收藏 0 点赞 0 评论 0

def calculate_cum_reward(policy):

    """Calculate cumulative reward with respect to time.

        Parameters
        ----------
        policy: bandit object
            The bandit algorithm you want to evaluate.

        Return
        ---------
        cum_reward: dict
            The dict stores {history_id: cumulative reward} .

        cum_n_actions: dict
            The dict stores
            {history_id: cumulative number of recommended actions}.
    """
    cum_reward = {-1: 0.0}
    cum_n_actions = {-1: 0}
    for i in range(policy.history_storage.n_histories):
        reward = policy.history_storage.get_history(i).rewards
        cum_n_actions[i] = cum_n_actions[i - 1] + len(reward)
        cum_reward[i] = cum_reward[i - 1] + sum(six.viewvalues(reward))
    return cum_reward, cum_n_actions