def run_single_softmax_experiment(beta, alpha):
"""Run experiment with agent using softmax update rule."""
print('Running a contextual bandit experiment')
cb = ContextualBandit()
ca = ContextualAgent(cb, beta=beta, alpha=alpha)
trials = 360
for _ in range(trials):
ca.run()
df = DataFrame(ca.log, columns=('context', 'action', 'reward', 'Q(c,23)',
'Q(c,14)', 'Q(c,8)', 'Q(c,3)'))
# fn = 'softmax_experiment.csv'
# df.to_csv(fn, index=False)
# print('Sequence written in', fn)
# globals().update(locals()) #
return df
评论列表
文章目录