def policy_network(state,theta,name='policy'):
with tf.variable_op_scope([state],name,name):
h0 = tf.identity(state,name='h0-state')
h1 = tf.nn.relu( tf.matmul(h0,theta[0]) + theta[1],name='h1')
h2 = tf.nn.relu( tf.matmul(h1,theta[2]) + theta[3],name='h2')
h3 = tf.identity(tf.matmul(h2,theta[4]) + theta[5],name='h3')
action = tf.nn.tanh(h3,name='h4-action')
return action
评论列表
文章目录