def rollout(self, doing_eval=False):
""" run one episode collecting observations, actions and advantages"""
observations, actions, rewards = [], [], []
observation = self.env.reset()
done = False
while not done:
observations.append(observation)
action = self.sample_action_given(observation, doing_eval)
assert action != 5, "FAIL! (multinomial logits sampling bug?"
observation, reward, done, _ = self.env.step(action)
actions.append(action)
rewards.append(reward)
if VERBOSE_DEBUG:
print "rollout: actions=%s" % (actions)
return observations, actions, rewards
评论列表
文章目录