def dist_info_sym(self, obs_var, latent_var=None): # this is ment to be for one path!
# now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work
if latent_var is None:
latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0)) # new fix to avoid putting the latent as an input: just take the one fixed!
latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1])
# generate the generalized input (append latents to obs.)
if self.bilinear_integration:
extended_obs_var = TT.concatenate([obs_var, latent_var,
TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :],
outdim=2)]
, axis=1)
else:
extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1)
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return dict(mean=mean_var, log_std=log_std_var)
评论列表
文章目录