def policy(obs, theta, name='policy'):
with tf.variable_op_scope([obs], name, name):
h0 = tf.identity(obs, name='h0-obs')
h1 = tf.nn.relu(tf.matmul(h0, theta[0]) + theta[1], name='h1')
h2 = tf.nn.relu(tf.matmul(h1, theta[2]) + theta[3], name='h2')
h3 = tf.identity(tf.matmul(h2, theta[4]) + theta[5], name='h3')
action = tf.nn.tanh(h3, name='h4-action')
return action
评论列表
文章目录