def _build_net(self):
with tf.variable_scope("conv"):
out = layers.convolution2d(self.s, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("actor"):
a_prob = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
a_prob = layers.fully_connected(a_prob, num_outputs=N_A, activation_fn=tf.nn.softmax)
with tf.variable_scope("critic"):
v = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
v = layers.fully_connected(v, num_outputs=1, activation_fn=tf.nn.softmax)
return a_prob, v
评论列表
文章目录