reinforce_with_baseline_pixels-skeleton.py 文件源码-python代码片段

reinforce_with_baseline_pixels-skeleton.py 文件源码

python

阅读 15 收藏 0 点赞 0 评论 0

def policy_and_value_network(observations):
        # TODO: Baseline network, used in (Mnih et al., 2016)
        conv = tf_layers.convolution2d(observations, 16, 8, 4)
        conv = tf_layers.convolution2d(conv, 32, 4, 2)
        conv = tf_layers.flatten(conv)
        hidden_layer = tf_layers.fully_connected(conv, 128, activation_fn=tf.nn.relu)
        logits = tf_layers.linear(hidden_layer, env.actions)
        value = tf_layers.linear(hidden_layer, 1)
        # TODO: If you do not want to use baseline, uncomment the next line
#         value = tf.zeros([tf.shape(observations)[0], 1])
        return logits, value