def _build_net(self, input_BO, acts_BO, scope):
""" The critic network.
Use ReLUs for all hidden layers. The output consists of one Q-value for
each batch. Set `reuse=False`. I don't use batch normalization or their
precise weight initialization.
Unlike the critic, it uses actions here but they are NOT included in the
first hidden layer. In addition, we do a tf.reshape to get an output of
shape (B,), not (B,1). Seems like tf.squeeze doesn't work with `?`.
"""
with tf.variable_scope(scope, reuse=False):
hidden1 = layers.fully_connected(input_BO,
num_outputs=400,
weights_initializer=layers.xavier_initializer(),
activation_fn=tf.nn.relu)
# Insert the concatenation here. This should be fine, I think.
state_action = tf.concat(axis=1, values=[hidden1, acts_BO])
hidden2 = layers.fully_connected(state_action,
num_outputs=300,
weights_initializer=layers.xavier_initializer(),
activation_fn=tf.nn.relu)
qvals_B = layers.fully_connected(hidden2,
num_outputs=1,
weights_initializer=layers.xavier_initializer(),
activation_fn=None)
return tf.reshape(qvals_B, shape=[-1])
评论列表
文章目录