def __init__(self, max_timesteps, max_episodes, observation_shape, action_shape):
self.max_timesteps = max_timesteps
self.max_episodes = max_episodes
self.observation_shape = observation_shape
self.action_shape = action_shape
self.preobs = np.empty((self.max_timesteps, self.max_episodes) + observation_shape)
self.actions = np.empty((self.max_timesteps, self.max_episodes) + action_shape)
self.rewards = np.empty((self.max_timesteps, self.max_episodes))
self.postobs = np.empty((self.max_timesteps, self.max_episodes) + observation_shape)
self.terminals = np.empty((self.max_timesteps, self.max_episodes), dtype = np.bool)
self.lengths = np.zeros(self.max_episodes, np.uint)
self.num_episodes = 0
self.episode = 0
self.timestep = 0
评论列表
文章目录