def run_test_episode(env, policy, episode_len=np.inf, render=False):
"""
Run an episode and return the reward
"""
episode_itr = 0
total_reward = 0.0
done = False
obs = env.reset()
while not done and episode_itr < episode_len:
if render:
env.render()
obs = apply_prediction_preprocessors(policy, obs)
action = policy.predict(obs)
action = apply_prediction_postprocessors(policy, action)
obs, reward, done, _ = env.step(action)
total_reward += reward
episode_itr += 1
return total_reward
评论列表
文章目录