def calculate_cum_reward(policy):
"""Calculate cumulative reward with respect to time.
Parameters
----------
policy: bandit object
The bandit algorithm you want to evaluate.
Return
---------
cum_reward: dict
The dict stores {history_id: cumulative reward} .
cum_n_actions: dict
The dict stores
{history_id: cumulative number of recommended actions}.
"""
cum_reward = {-1: 0.0}
cum_n_actions = {-1: 0}
for i in range(policy.history_storage.n_histories):
reward = policy.history_storage.get_history(i).rewards
cum_n_actions[i] = cum_n_actions[i - 1] + len(reward)
cum_reward[i] = cum_reward[i - 1] + sum(six.viewvalues(reward))
return cum_reward, cum_n_actions
评论列表
文章目录