def _make_network(self, data_in, out_dim):
""" Build the network with the same architecture following OpenAI's paper.
Returns the final *layer* of the network, which corresponds to our
chosen action. There is no non-linearity for the last layer because
different envs have different action ranges.
"""
with tf.variable_scope("ESAgent", reuse=False):
out = data_in
out = layers.fully_connected(out, num_outputs=64,
weights_initializer = layers.xavier_initializer(uniform=True),
#weights_initializer = utils.normc_initializer(0.5),
activation_fn = tf.nn.tanh)
out = layers.fully_connected(out, num_outputs=64,
weights_initializer = layers.xavier_initializer(uniform=True),
#weights_initializer = utils.normc_initializer(0.5),
activation_fn = tf.nn.tanh)
out = layers.fully_connected(out, num_outputs=out_dim,
weights_initializer = layers.xavier_initializer(uniform=True),
#weights_initializer = utils.normc_initializer(0.5),
activation_fn = None)
return out
评论列表
文章目录