def get_action(self, observation):
if self._state_include_action:
if self._prev_action is None:
prev_action = np.zeros((self.action_space.flat_dim,))
else:
prev_action = self.action_space.flatten(self._prev_action)
all_input = np.concatenate([
self.observation_space.flatten(observation),
prev_action
])
else:
all_input = self.observation_space.flatten(observation)
# should not be used
prev_action = np.nan
mean, log_std, hidden_vec = [x[0] for x in self._f_step_mean_std([all_input], [self._prev_hidden])]
rnd = np.random.normal(size=mean.shape)
action = rnd * np.exp(log_std) + mean
self._prev_action = action
self._prev_hidden = hidden_vec
agent_info = dict(mean=mean, log_std=log_std)
if self._state_include_action:
agent_info["prev_action"] = prev_action
return action, agent_info
评论列表
文章目录