bc.py 文件源码-python代码片段

def policy_model(data_in, action_dim):
    """ Create a neural network representing the BC policy. It will be trained
    using standard supervised learning techniques.

    Parameters
    ----------
    data_in: [Tensor]
        The input (a placeholder) to the network, with leading dimension
        representing the batch size.
    action_dim: [int]
        Number of actions, each of which (at least for MuJoCo) is
        continuous-valued.

    Returns
    ------- 
    out [Tensor]
        The output tensor which represents the predicted (or desired, if
        testing) action to take for the agent.
    """
    with tf.variable_scope("BCNetwork", reuse=False):
        out = data_in
        out = layers.fully_connected(out, num_outputs=100,
                weights_initializer=layers.xavier_initializer(uniform=True),
                activation_fn=tf.nn.tanh)
        out = layers.fully_connected(out, num_outputs=100,
                weights_initializer=layers.xavier_initializer(uniform=True),
                activation_fn=tf.nn.tanh)
        out = layers.fully_connected(out, num_outputs=action_dim,
                weights_initializer=layers.xavier_initializer(uniform=True),
                activation_fn=None)
        return out