def __init__(self, namespace, actor):
super(CriticNetwork, self).__init__(namespace)
# input state to the critic is the _same_ state given to the actor.
# input action to the critic is simply the output action of the actor.
# even though when training we explicitly provide a new value for the
# input action (via the input_action placeholder) we need to be stop the gradient
# flowing to the actor since there is a path through the actor to the input_state
# too, hence we need to be explicit about cutting it (otherwise training the
# critic will attempt to train the actor too.
self.input_state = actor.input_state
self.input_action = tf.stop_gradient(actor.output_action)
with tf.variable_scope(namespace):
if opts.use_raw_pixels:
conv_net = self.simple_conv_net_on(self.input_state, opts)
# TODO: use base_network helper
hidden1 = slim.fully_connected(conv_net, 200, scope='hidden1')
hidden2 = slim.fully_connected(hidden1, 50, scope='hidden2')
concat_inputs = tf.concat(1, [hidden2, self.input_action])
final_hidden = slim.fully_connected(concat_inputs, 50, scope="hidden3")
else:
# stack of hidden layers on flattened input; (batch,2,2,7) -> (batch,28)
flat_input_state = slim.flatten(self.input_state, scope='flat')
concat_inputs = tf.concat(1, [flat_input_state, self.input_action])
final_hidden = self.hidden_layers_starting_at(concat_inputs,
opts.critic_hidden_layers)
# output from critic is a single q-value
self.q_value = slim.fully_connected(scope='q_value',
inputs=final_hidden,
num_outputs=1,
weights_regularizer=tf.contrib.layers.l2_regularizer(0.01),
activation_fn=None)
评论列表
文章目录