def _build_vr_network(self):
self.vr_states = tf.placeholder(shape=[None, 80, 80, 4], dtype=tf.float32)
self.vr_value_targets = tf.placeholder(shape=[None], dtype=tf.float32)
with tf.variable_scope("shared", reuse=True):
conv2 = self.build_shared_network(self.vr_states)
fc1 = tf.contrib.layers.fully_connected(
inputs=tf.contrib.layers.flatten(conv2),
num_outputs=256,
scope="fc1",
reuse=True)
self.vr_value = tf.contrib.layers.fully_connected(
inputs=fc1,
num_outputs=1,
activation_fn=None,
scope='logits_value',
reuse=True)
self.vr_value = tf.squeeze(self.vr_value, squeeze_dims=[1])
self.vr_losses = tf.squared_difference(self.vr_value, self.vr_value_targets)
self.vr_loss = tf.reduce_sum(self.vr_losses)
self.vr_loss = self.pc_vr_lambda * self.vr_loss
评论列表
文章目录