def _build_net(self,S,scope,trainable):
#create scope
#hidden dimension 30
#input S into fully connnected layer and then relu activation function
#input hidden units into fully connected layer and tanh activation function
#scale action to action bound
with tf.variable_scope(scope):
l1_dim = 30
w1 = tf.Variable(tf.truncated_normal([self.state_dim, l1_dim],mean = 0,stddev = 0.3,seed = 1234),trainable=trainable)
b1 = tf.Variable(tf.constant(0.1,shape=[l1_dim]),trainable = trainable)
l1 = tf.add(tf.matmul(S,w1),b1)
net = tf.nn.relu(l1)
with tf.variable_scope('a'):
w2 = tf.Variable(tf.truncated_normal([l1_dim,self.a_dim],mean = 0, stddev = 0.3,seed = 1234),trainable =trainable)
b2 = tf.Variable(tf.constant(0.1,shape=[self.a_dim]),trainable = trainable)
a = tf.tanh(tf.add(tf.matmul(l1,w2),b2))
scaled_a = tf.multiply(a, self.action_bound)
return scaled_a
#add grad to tensorflow graph
#input:
# a_grads: dq/da from critic
评论列表
文章目录