def _build_net(self, input_BO, scope):
""" The Actor network.
Uses ReLUs for all hidden layers, but a tanh to the output to bound the
action. This follows their 'low-dimensional networks' using 400 and 300
units for the hidden layers. Set `reuse=False`. I don't use batch
normalization or their precise weight initialization.
"""
with tf.variable_scope(scope, reuse=False):
hidden1 = layers.fully_connected(input_BO,
num_outputs=400,
weights_initializer=layers.xavier_initializer(),
activation_fn=tf.nn.relu)
hidden2 = layers.fully_connected(hidden1,
num_outputs=300,
weights_initializer=layers.xavier_initializer(),
activation_fn=tf.nn.relu)
actions_BA = layers.fully_connected(hidden2,
num_outputs=self.ac_dim,
weights_initializer=layers.xavier_initializer(),
activation_fn=tf.nn.tanh) # Note the tanh!
# This should broadcast, but haven't tested with ac_dim > 1.
actions_BA = tf.multiply(actions_BA, self.ac_high)
return actions_BA
评论列表
文章目录