ddpg.py 文件源码-python代码片段

ddpg.py 文件源码

python

阅读 25 收藏 0 点赞 0 评论 0

项目：zjhReinforcementLearning 作者: jhneo 项目源码文件源码

def _build_net(self,S,scope,trainable):
        #create scope
        #hidden dimension 30
        #input S into fully connnected layer and then relu activation function
        #input hidden units into fully connected layer and tanh activation function
        #scale action to action bound
        with tf.variable_scope(scope):
            l1_dim = 30
            w1 = tf.Variable(tf.truncated_normal([self.state_dim, l1_dim],mean = 0,stddev = 0.3,seed = 1234),trainable=trainable)
            b1 = tf.Variable(tf.constant(0.1,shape=[l1_dim]),trainable  = trainable)
            l1 = tf.add(tf.matmul(S,w1),b1)
            net = tf.nn.relu(l1)

            with tf.variable_scope('a'):
                w2 = tf.Variable(tf.truncated_normal([l1_dim,self.a_dim],mean = 0, stddev = 0.3,seed = 1234),trainable =trainable)
                b2 = tf.Variable(tf.constant(0.1,shape=[self.a_dim]),trainable = trainable)
                a = tf.tanh(tf.add(tf.matmul(l1,w2),b2))
                scaled_a = tf.multiply(a, self.action_bound)

        return scaled_a

    #add grad to tensorflow graph
    #input: 
    #   a_grads: dq/da from critic