def policy_and_value_network(observations):
# TODO: Baseline network, used in (Mnih et al., 2016)
conv = tf_layers.convolution2d(observations, 16, 8, 4)
conv = tf_layers.convolution2d(conv, 32, 4, 2)
conv = tf_layers.flatten(conv)
hidden_layer = tf_layers.fully_connected(conv, 128, activation_fn=tf.nn.relu)
logits = tf_layers.linear(hidden_layer, env.actions)
value = tf_layers.linear(hidden_layer, 1)
# TODO: If you do not want to use baseline, uncomment the next line
# value = tf.zeros([tf.shape(observations)[0], 1])
return logits, value
reinforce_with_baseline_pixels-skeleton.py 文件源码
python
阅读 15
收藏 0
点赞 0
评论 0
评论列表
文章目录