def get_output_p(self, path): # this gives the p_dist for every step: the latent posterior wrt obs_act
if self.recurrent:
obs_actions = [np.concatenate([path["observations"][:, self.obs_regressed],
path["actions"][:, self.act_regressed]],
axis=1)] # is this the same??
else:
obs_actions = np.concatenate([path["observations"][:, self.obs_regressed],
path["actions"][:, self.act_regressed]], axis=1)
if self.noisify_traj_coef:
obs_actions += np.random.normal(loc=0.0, scale=float(np.mean(np.abs(obs_actions))) * self.noisify_traj_coef,
size=np.shape(obs_actions))
if self.use_only_sign:
obs_actions = np.sign(obs_actions)
if self.policy.latent_name == 'bernoulli':
return self._regressor._f_p(obs_actions).flatten()
elif self.policy.latent_name == 'normal':
return self._regressor._f_pdists(obs_actions).flatten()
评论列表
文章目录