def _step(self, action_n):
observation_n, reward_n, done_n, info = self.env.step(action_n)
# Pass along ID of potentially-done episode
for i, info_i in enumerate(info['n']):
info_i['vectorized.episode_id'] = self.episode_ids[i]
done_i = np.argwhere(done_n).reshape(-1)
if len(done_i):
for i in done_i:
self.extra_done.add(self.episode_ids[i])
# Episode completed, so we bump its value
self.episode_ids[i] += self.n
if self.episode_limit is not None and self.episode_ids[i] > self.episode_limit:
logger.debug('Masking: index=%s episode_id=%s', i, self.episode_ids[i])
self.env.mask(i)
self._set_done_to()
# Pass along the number of contiguous episodes that are now done
info['vectorized.done_to'] = self.done_to
return observation_n, reward_n, done_n, info
评论列表
文章目录