def eval_single_run(env, model, phi, deterministic=False):
model.reset_state()
test_r = 0
obs = env.reset()
done = False
while not done:
s = chainer.Variable(np.expand_dims(phi(obs), 0))
pout = model.pi_and_v(s)[0]
model.unchain_backward()
if deterministic:
a = pout.most_probable_actions[0]
else:
a = pout.action_indices[0]
obs, r, done, info = env.step(a)
test_r += r
return test_r
评论列表
文章目录