def step(self, action):
screens = []
total_reward = 0
for t in range(4):
screen = self.get_screen()
screens.append(screen)
_, reward, done, info = self.env.step(self.action_mapping[action])
total_reward += reward
if done or total_reward:
if done:
self.env.reset()
for _ in range(20):
self.env.step(0)
for _ in range(3 - t):
screens.append(screen)
break
screens = np.asarray(screens).astype(np.float)
return screens, total_reward, done, info
评论列表
文章目录