ddpg.py 文件源码-python代码片段

def _build_net(self, input_BO, acts_BO, scope):
        """ The critic network.

        Use ReLUs for all hidden layers. The output consists of one Q-value for
        each batch. Set `reuse=False`. I don't use batch normalization or their
        precise weight initialization.

        Unlike the critic, it uses actions here but they are NOT included in the
        first hidden layer. In addition, we do a tf.reshape to get an output of
        shape (B,), not (B,1). Seems like tf.squeeze doesn't work with `?`.
        """
        with tf.variable_scope(scope, reuse=False):
            hidden1 = layers.fully_connected(input_BO,
                    num_outputs=400,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=tf.nn.relu)
            # Insert the concatenation here. This should be fine, I think.
            state_action = tf.concat(axis=1, values=[hidden1, acts_BO])
            hidden2 = layers.fully_connected(state_action,
                    num_outputs=300,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=tf.nn.relu)
            qvals_B = layers.fully_connected(hidden2,
                    num_outputs=1,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=None)
            return tf.reshape(qvals_B, shape=[-1])