def _make_actiondist_ops(self, obsfeat_B_Df):
# Computes action distribution mean (of a Gaussian) using MLP
with nn.variable_scope('hidden'):
net = nn.FeedforwardNet(obsfeat_B_Df, (self.obsfeat_space.dim,), self.cfg.hidden_spec)
with nn.variable_scope('out'):
mean_layer = nn.AffineLayer(net.output, net.output_shape, (self.action_space.dim,), initializer=np.zeros((net.output_shape[0], self.action_space.dim)))
assert mean_layer.output_shape == (self.action_space.dim,)
means_B_Da = mean_layer.output
# Action distribution log standard deviations are parameters themselves
logstdevs_1_Da = nn.get_variable('logstdevs_1_Da', np.full((1, self.action_space.dim), self.cfg.init_logstdev), broadcastable=(True,False))
stdevs_1_Da = self.cfg.min_stdev + tensor.exp(logstdevs_1_Da) # minimum stdev seems to make density / kl computations more stable
stdevs_B_Da = tensor.ones_like(means_B_Da)*stdevs_1_Da # "broadcast" to (B,Da)
actiondist_B_Pa = tensor.concatenate([means_B_Da, stdevs_B_Da], axis=1)
return actiondist_B_Pa
评论列表
文章目录