def _make_actiondist_ops(self, obs_B_Df):
with tf.variable_scope('flat'):
flat = nn.FlattenLayer(obs_B_Df)
with tf.variable_scope('hidden'):
net = nn.FeedforwardNet(flat.output, flat.output_shape, self.hidden_spec)
with tf.variable_scope('out'):
mean_layer = nn.AffineLayer(net.output, net.output_shape, self.action_space.shape,
Winitializer=tf.zeros_initializer, binitializer=None)
means_B_Da = mean_layer.output
# logstdev params
logstdevs_1_Da = tf.get_variable('logstdevs_1_Da', shape=(1, self.action_space.shape[0]),
initializer=tf.constant_initializer(self.init_logstdev))
stdevs_1_Da = self.min_stdev + tf.exp(
logstdevs_1_Da) # Required for stability of kl computations
stdevs_B_Da = tf.ones_like(means_B_Da) * stdevs_1_Da
actiondist_B_Pa = tf.concat(1, [means_B_Da, stdevs_B_Da])
return actiondist_B_Pa
评论列表
文章目录