def _step(self, action):
obs, reward, done, info = self.env.step(action)
obs = self.process_observation(obs)
if self.squash_rewards:
reward = float(np.sign(reward))
else:
reward = float(reward) / float(self.reward_scale)
info["frame/lives"] = info["ale.lives"]
if self.lives is None:
self.lives = info["ale.lives"]
else:
current_lives = info["ale.lives"]
lost = self.lives - current_lives
self.lives = current_lives
if lost > 0:
reward -= lost * self.death_penalty
return obs, reward, done, info
评论列表
文章目录