def policy_model(data_in, action_dim):
""" Create a neural network representing the BC policy. It will be trained
using standard supervised learning techniques.
Parameters
----------
data_in: [Tensor]
The input (a placeholder) to the network, with leading dimension
representing the batch size.
action_dim: [int]
Number of actions, each of which (at least for MuJoCo) is
continuous-valued.
Returns
-------
out [Tensor]
The output tensor which represents the predicted (or desired, if
testing) action to take for the agent.
"""
with tf.variable_scope("BCNetwork", reuse=False):
out = data_in
out = layers.fully_connected(out, num_outputs=100,
weights_initializer=layers.xavier_initializer(uniform=True),
activation_fn=tf.nn.tanh)
out = layers.fully_connected(out, num_outputs=100,
weights_initializer=layers.xavier_initializer(uniform=True),
activation_fn=tf.nn.tanh)
out = layers.fully_connected(out, num_outputs=action_dim,
weights_initializer=layers.xavier_initializer(uniform=True),
activation_fn=None)
return out
评论列表
文章目录