def plot_avg_regret(policy):
"""Plot average regret with respect to time.
Parameters
----------
policy: bandit object
The bandit algorithm you want to evaluate.
"""
avg_reward = calculate_avg_reward(policy)
points = sorted(six.viewitems(avg_reward), key=lambda x: x[0])
x, y = zip(*points)
plt.plot(x, [1 - reward for reward in y], 'r-', label="average regret")
plt.xlabel('time')
plt.ylabel('avg regret')
plt.legend()
plt.title("Average Regret with respect to Time")
评论列表
文章目录