def eval_performance(rom, model, deterministic=False, use_sdl=False,
record_screen_dir=None):
env = ale.ALE(rom, treat_life_lost_as_terminal=False, use_sdl=use_sdl,
record_screen_dir=record_screen_dir)
model.reset_state()
test_r = 0
while not env.is_terminal:
s = chainer.Variable(np.expand_dims(dqn_phi(env.state), 0))
pout = model.pi_and_v(s)[0]
model.unchain_backward()
if deterministic:
a = pout.most_probable_actions[0]
else:
a = pout.action_indices[0]
test_r += env.receive_action(a)
return test_r
评论列表
文章目录