def forward(image, num_actions):
# Conv1
out = layers.convolution2d(image, num_outputs=16, kernel_size=8, stride=4, activation_fn=tf.nn.relu, scope='conv1')
out = layers.convolution2d(out, num_outputs=32, kernel_size=4, stride=2, activation_fn=tf.nn.relu, scope='conv2')
out = layers.flatten(out, scope='flatten')
out = layers.fully_connected(out, num_outputs=256, activation_fn=tf.nn.relu, scope='fc1')
action_logprobs = tf.nn.log_softmax(layers.fully_connected(out, num_outputs=num_actions, activation_fn=None, scope='fc_actor'))
value = layers.fully_connected(out, num_outputs=1, activation_fn=None, scope='fc_critic')
value = tf.reshape(value, [-1])
return action_logprobs, value
评论列表
文章目录