def lowb_mutual(self, paths, times=(0, None)):
if self.recurrent:
observations = np.array([p["observations"][times[0]:times[1], self.obs_regressed] for p in paths])
actions = np.array([p["actions"][times[0]:times[1], self.act_regressed] for p in paths])
obs_actions = np.concatenate([observations, actions], axis=2)
latents = np.array([p['agent_infos']['latents'][times[0]:times[1]] for p in paths])
else:
observations = np.concatenate([p["observations"][times[0]:times[1], self.obs_regressed] for p in paths])
actions = np.concatenate([p["actions"][times[0]:times[1], self.act_regressed] for p in paths])
obs_actions = np.concatenate([observations, actions], axis=1)
latents = np.concatenate([p['agent_infos']["latents"][times[0]:times[1]] for p in paths])
if self.noisify_traj_coef:
obs_actions += np.random.multivariate_normal(mean=np.zeros_like(np.mean(obs_actions,axis=0)),
cov=np.diag(np.mean(np.abs(obs_actions),
axis=0) * self.noisify_traj_coef),
size=np.shape(obs_actions)[0])
if self.use_only_sign:
obs_actions = np.sign(obs_actions)
H_latent = self.policy.latent_dist.entropy(self.policy.latent_dist_info) # sum of entropies latents in
return H_latent + np.mean(self._regressor.predict_log_likelihood(obs_actions, latents))
评论列表
文章目录