ddpg.py 文件源码-python代码片段

ddpg.py 文件源码

python

阅读 27 收藏 0 点赞 0 评论 0

项目：rl_algorithms 作者: DanielTakeshi 项目源码文件源码

def _build_net(self, input_BO, scope):
        """ The Actor network.

        Uses ReLUs for all hidden layers, but a tanh to the output to bound the
        action. This follows their 'low-dimensional networks' using 400 and 300
        units for the hidden layers. Set `reuse=False`. I don't use batch
        normalization or their precise weight initialization.
        """
        with tf.variable_scope(scope, reuse=False):
            hidden1 = layers.fully_connected(input_BO,
                    num_outputs=400,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=tf.nn.relu)
            hidden2 = layers.fully_connected(hidden1, 
                    num_outputs=300,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=tf.nn.relu)
            actions_BA = layers.fully_connected(hidden2,
                    num_outputs=self.ac_dim,
                    weights_initializer=layers.xavier_initializer(),
                    activation_fn=tf.nn.tanh) # Note the tanh!
            # This should broadcast, but haven't tested with ac_dim > 1.
            actions_BA = tf.multiply(actions_BA, self.ac_high)
            return actions_BA