latent_regressor.py 文件源码-python代码片段

def lowb_mutual(self, paths, times=(0, None)):
        if self.recurrent:
            observations = np.array([p["observations"][times[0]:times[1], self.obs_regressed] for p in paths])
            actions = np.array([p["actions"][times[0]:times[1], self.act_regressed] for p in paths])
            obs_actions = np.concatenate([observations, actions], axis=2)
            latents = np.array([p['agent_infos']['latents'][times[0]:times[1]] for p in paths])
        else:
            observations = np.concatenate([p["observations"][times[0]:times[1], self.obs_regressed] for p in paths])
            actions = np.concatenate([p["actions"][times[0]:times[1], self.act_regressed] for p in paths])
            obs_actions = np.concatenate([observations, actions], axis=1)
            latents = np.concatenate([p['agent_infos']["latents"][times[0]:times[1]] for p in paths])
        if self.noisify_traj_coef:
            obs_actions += np.random.multivariate_normal(mean=np.zeros_like(np.mean(obs_actions,axis=0)),
                                                         cov=np.diag(np.mean(np.abs(obs_actions),
                                                                     axis=0) * self.noisify_traj_coef),
                                                         size=np.shape(obs_actions)[0])
        if self.use_only_sign:
            obs_actions = np.sign(obs_actions)
        H_latent = self.policy.latent_dist.entropy(self.policy.latent_dist_info)  # sum of entropies latents in

        return H_latent + np.mean(self._regressor.predict_log_likelihood(obs_actions, latents))