networks.py 文件源码

python
阅读 32 收藏 0 点赞 0 评论 0

项目:a3c_torcs 作者: waxz 项目源码 文件源码
def _create_train(self):
        with tf.variable_scope(self.scope):
            self.actions = tf.placeholder(
                shape=[None, self.action_size], dtype=tf.float32,
                name='actions')
            self.target_v = tf.placeholder(
                shape=[None], dtype=tf.float32, name='target_v')
            self.advantages = tf.placeholder(
                shape=[None], dtype=tf.float32, name='advantages')

            # Determine the policy loss using the actions and the advantage
            log_prob = self.normal_dist.log_prob(self.actions)
            exp_v = tf.transpose(
                tf.multiply(tf.transpose(log_prob), self.advantages))
            entropy = self.normal_dist.entropy()
            exp_v = 0.01 * entropy + exp_v
            self.policy_loss = tf.reduce_sum(-exp_v)

            self.value_loss = 0.5 * tf.reduce_sum(
                tf.square(self.target_v - tf.reshape(self.value, [-1])))

            self.loss = 0.5*self.value_loss + self.policy_loss

            local_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)

            self.gradients = tf.gradients(self.loss, local_vars)
            self.var_norms = tf.global_norm(local_vars)

            grads, self.grad_norms = tf.clip_by_global_norm(
                self.gradients, 40.0)

            global_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
            self.apply_grads = self.trainer.apply_gradients(
                zip(grads, global_vars))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号