python类global_norm()的实例源码

model_helper.py 文件源码 项目:nmt 作者: tensorflow 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def gradient_clip(gradients, max_gradient_norm):
  """Clipping gradients of a model."""
  clipped_gradients, gradient_norm = tf.clip_by_global_norm(
      gradients, max_gradient_norm)
  gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)]
  gradient_norm_summary.append(
      tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients)))

  return clipped_gradients, gradient_norm_summary, gradient_norm
variables.py 文件源码 项目:wide-deep-cnn 作者: DaniUPC 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def summarize_gradients(model_name, gradients):
    """ Adds histograms for gradients and gradient norms of the input
    gradients """

    def get_prefix(var):
        return model_name + '/' + var.name

    for gradient, variable in gradients:
        if gradient is not None:
            tf.summary.histogram(get_prefix(variable) + "/gradients", gradient)
            tf.summary.histogram(get_prefix(variable) + "/gradient_norm",
                                 tf.global_norm([gradient]))
test_gan_losses.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_stable_global_norm_avoids_overflow(self):
        tensors = [tf.ones([4]), tf.ones([4, 4]) * 1e19, None]
        gnorm_is_inf = tf.is_inf(tf.global_norm(tensors))
        stable_gnorm_is_inf = tf.is_inf(
            tfgan_losses._numerically_stable_global_norm(tensors))

        with self.test_session(use_gpu=True):
            self.assertTrue(gnorm_is_inf.eval())
            self.assertFalse(stable_gnorm_is_inf.eval())
test_gan_losses.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_stable_global_norm_unchanged(self):
        """Test that preconditioning doesn't change global norm value."""
        tf.set_random_seed(1234)
        tensors = [tf.random_uniform(
            [3] * i, -10.0, 10.0) for i in range(6)]
        gnorm = tf.global_norm(tensors)
        precond_gnorm = tfgan_losses._numerically_stable_global_norm(tensors)

        with self.test_session(use_gpu=True) as sess:
            # spot check closeness on more than one sample.
            for _ in range(10):
                gnorm_np, precond_gnorm_np = sess.run([gnorm, precond_gnorm])
                self.assertNear(gnorm_np, precond_gnorm_np, 1e-5)
optimizer.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.):
    """Clips gradients of a multitask loss by their global norm.

    Ignores all-zero tensors when computing the global norm.

    Args:
      gradients_variables: a list of pairs (gradient, variable).
      clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.

    Returns:
      list: A list of pairs of the same type as gradients_variables,.
      fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
    """
    gradients, variables = six.moves.zip(*gradients_variables)

    def _replace_nonexisting_grad(grad):
        if grad is None:
            return grad
        all_zeros = _is_all_zeros(grad)
        return tf.cond(
            all_zeros,
            lambda: tf.zeros([], dtype=tf.as_dtype(grad.dtype)),
            lambda: grad)

    nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients]
    fixed_global_norm = tf.global_norm(nonzero_gradients)
    gradients, _ = tf.clip_by_global_norm(
        gradients, clip_norm, use_norm=fixed_global_norm)
    return list(six.moves.zip(gradients, variables)), fixed_global_norm
base.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _adaptive_gradient_clipping(self, grads_and_vars, std_factor=2., decay=0.95, static_max_norm=None, global_step=None, epsilon=1e-8, name=None):
        """function for adaptive gradient clipping."""
        grads, variables = zip(*grads_and_vars)
        norm = tf.global_norm(grads)
        max_norm, log_mean = self._adaptive_max_norm(norm, std_factor, decay,
                                                     global_step, epsilon, name)

        # factor will be 1. if norm is smaller than max_norm
        factor = tf.where(norm < max_norm,
                          tf.ones_like(norm),
                          tf.exp(log_mean) / norm)

        if static_max_norm is not None:
            factor = tf.minimum(static_max_norm / norm, factor)

        # apply factor
        clipped_grads = []
        for grad in grads:
            if grad is None:
                clipped_grads.append(None)
            elif isinstance(grad, tf.IndexedSlices):
                clipped_grads.append(tf.IndexedSlices(grad.values * factor, grad.indices,
                                                      grad.dense_shape))
            else:
                clipped_grads.append(grad * factor)

        return list(zip(clipped_grads, variables))
networks.py 文件源码 项目:a3c_torcs 作者: waxz 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _create_train(self):
        with tf.variable_scope(self.scope):
            self.actions = tf.placeholder(
                shape=[None, self.action_size], dtype=tf.float32,
                name='actions')
            self.target_v = tf.placeholder(
                shape=[None], dtype=tf.float32, name='target_v')
            self.advantages = tf.placeholder(
                shape=[None], dtype=tf.float32, name='advantages')

            # Determine the policy loss using the actions and the advantage
            log_prob = self.normal_dist.log_prob(self.actions)
            exp_v = tf.transpose(
                tf.multiply(tf.transpose(log_prob), self.advantages))
            entropy = self.normal_dist.entropy()
            exp_v = 0.01 * entropy + exp_v
            self.policy_loss = tf.reduce_sum(-exp_v)

            self.value_loss = 0.5 * tf.reduce_sum(
                tf.square(self.target_v - tf.reshape(self.value, [-1])))

            self.loss = 0.5*self.value_loss + self.policy_loss

            local_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)

            self.gradients = tf.gradients(self.loss, local_vars)
            self.var_norms = tf.global_norm(local_vars)

            grads, self.grad_norms = tf.clip_by_global_norm(
                self.gradients, 40.0)

            global_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
            self.apply_grads = self.trainer.apply_gradients(
                zip(grads, global_vars))
model_deploy.py 文件源码 项目:segmentation-models 作者: desimone 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def _add_gradients_summaries(grads_and_vars):
    """Add histogram summaries to gradients.

      Note: The summaries are also added to the SUMMARIES collection.

      Args:
        grads_and_vars: A list of gradient to variable pairs (tuples).

      Returns:
        The _list_ of the added summaries for grads_and_vars.
      """
    summaries = []
    for grad, var in grads_and_vars:
        if grad is not None:
            if isinstance(grad, tf.IndexedSlices):
                grad_values = grad.values
            else:
                grad_values = grad
            summaries.append(
                tf.histogram_summary(var.op.name + ':gradient', grad_values))
            summaries.append(
                tf.histogram_summary(var.op.name + ':gradient_norm',
                                     tf.global_norm([grad_values])))
        else:
            tf.logging.info('Var %s has no gradient', var.op.name)
    return summaries
language_model.py 文件源码 项目:ran 作者: kentonl 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def _backward(self, loss, summaries=False):
        hps = self.hps

        loss = loss * hps.num_steps

        emb_vars = find_trainable_variables("emb")
        lstm_vars = find_trainable_variables("LSTM")
        softmax_vars = find_trainable_variables("softmax")

        all_vars = emb_vars + lstm_vars + softmax_vars
        grads = tf.gradients(loss, all_vars)
        orig_grads = grads[:]
        emb_grads = grads[:len(emb_vars)]
        grads = grads[len(emb_vars):]
        for i in range(len(emb_grads)):
            assert isinstance(emb_grads[i], tf.IndexedSlices)
            emb_grads[i] = tf.IndexedSlices(emb_grads[i].values * hps.batch_size, emb_grads[i].indices,
                                            emb_grads[i].dense_shape)

        lstm_grads = grads[:len(lstm_vars)]
        softmax_grads = grads[len(lstm_vars):]

        lstm_grads, lstm_norm = tf.clip_by_global_norm(lstm_grads, hps.max_grad_norm)
        clipped_grads = emb_grads + lstm_grads + softmax_grads
        assert len(clipped_grads) == len(orig_grads)

        if summaries:
            tf.summary.scalar("model/lstm_grad_norm", lstm_norm)
            tf.summary.scalar("model/lstm_grad_scale", tf.minimum(hps.max_grad_norm / lstm_norm, 1.0))
            tf.summary.scalar("model/lstm_weight_norm", tf.global_norm(lstm_vars))
            # for v, g, cg in zip(all_vars, orig_grads, clipped_grads):
            #     name = v.name.lstrip("model/")
            #     tf.histogram_summary(name + "/var", v)
            #     tf.histogram_summary(name + "/grad", g)
            #     tf.histogram_summary(name + "/clipped_grad", cg)

        return list(zip(clipped_grads, all_vars))
model_helper.py 文件源码 项目:GNMT2 作者: Mingyearn 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def gradient_clip(gradients, params, max_gradient_norm):
  """Clipping gradients of a model."""
  clipped_gradients, gradient_norm = tf.clip_by_global_norm(
      gradients, max_gradient_norm)
  gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)]
  gradient_norm_summary.append(
      tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients)))

  return clipped_gradients, gradient_norm_summary
optimizers.py 文件源码 项目:odin 作者: imito 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_gradients(self, loss_or_grads, params):
    """
    Note
    ----
    The returned gradients may contain None value
    """
    # check valid algorithm
    if self.algorithm is None or \
    not hasattr(self.algorithm, 'compute_gradients') or \
    not hasattr(self.algorithm, 'apply_gradients'):
      raise RuntimeError("Optimizer is None, or doesn't has attributes: "
                         "compute_gradients and apply_gradients.")
    with tf.variable_scope(self.name):
      # get the gradient
      grads_var = self.algorithm.compute_gradients(loss_or_grads,
                                                   var_list=params)
      grads_var = {g: v for g, v in grads_var if g is not None}
      grads = list(grads_var.keys())
      params = list(grads_var.values())
      # ====== clipnorm ====== #
      if self.clipnorm is not None:
        if self.clip_alg == 'norm':
          grads = [tf.clip_by_norm(g, self.clipnorm)
                   for g in grads]
        elif self.clip_alg == 'total_norm':
          grads, _ = tf.clip_by_global_norm(grads, self.clipnorm)
        elif self.clip_alg == 'avg_norm':
          grads = [tf.clip_by_average_norm(g, self.clipnorm)
                   for g in grads]
      # ====== clipvalue ====== #
      if self.clipvalue is not None:
        grads = [tf.clip_by_value(g, -self.clipvalue, self.clipvalue)
                 for g in grads]
      # ====== get final norm value ====== #
      self._norm = add_role(tf.global_norm(grads, name="GradientNorm"),
                            GradientsNorm)
      return [(g, p) for g, p in zip(grads, params)]
tfpolicy.py 文件源码 项目:ray 作者: ray-project 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def initialize(self):
        if self.summarize:
            bs = tf.to_float(tf.shape(self.x)[0])
            tf.summary.scalar("model/policy_loss", self.pi_loss / bs)
            tf.summary.scalar("model/value_loss", self.vf_loss / bs)
            tf.summary.scalar("model/entropy", self.entropy / bs)
            tf.summary.scalar("model/grad_gnorm", tf.global_norm(self.grads))
            tf.summary.scalar("model/var_gnorm", tf.global_norm(self.var_list))
            self.summary_op = tf.summary.merge_all()

        self.sess = tf.Session(graph=self.g, config=tf.ConfigProto(
            intra_op_parallelism_threads=1, inter_op_parallelism_threads=2))
        self.variables = ray.experimental.TensorFlowVariables(self.loss,
                                                              self.sess)
        self.sess.run(tf.global_variables_initializer())
base.py 文件源码 项目:a3c-tensorflow 作者: carpedm20 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def build_summary(self):
    bs = tf.to_float(tf.shape(self.local_network.x)[0])

    tf.summary.scalar("model/policy_loss", self.pi_loss / bs)
    tf.summary.scalar("model/value_loss", self.vf_loss / bs)
    tf.summary.scalar("model/entropy", self.entropy / bs)
    tf.summary.image("model/state", self.local_network.x)
    tf.summary.scalar("model/grad_global_norm", tf.global_norm(self.grads))
    tf.summary.scalar("model/var_global_norm", tf.global_norm(self.local_network.var_list))
    tf.summary.scalar("model/lr", self.lr)

    self.summary_op = tf.summary.merge_all()
model_base.py 文件源码 项目:nmt_v2 作者: rpryzant 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _make_training_op(self):
        if self.config.optimizer == 'sgd':
            self.learning_rate = tf.cond(
                self.global_step < self.config.start_decay_step,
                lambda: tf.constant(self.config.learning_rate),
                lambda: tf.train.exponential_decay(
                    self.config.learning_rate,
                    (self.global_step - self.config.start_decay_step),
                    self.config.decay_steps,
                    self.config.decay_factor,
                    staircase=True),
                name='learning_rate')
            optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        elif self.config.optimizer == 'adam':
            assert self.config.learning_rate < 0.007
            self.learning_rate = tf.constant(self.config.learning_rate)
            optimizer = tf.train.AdamOptimizer(self.learning_rate)

        params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, params)
        clipped_gradients, gradient_norm = tf.clip_by_global_norm(
            gradients, self.config.max_gradient_norm)

        tf.summary.scalar("grad_norm", gradient_norm)
        tf.summary.scalar("clipped_norm", tf.global_norm(clipped_gradients))
        tf.summary.scalar("learning_rate", self.learning_rate)

        train_op = optimizer.apply_gradients(
            zip(clipped_gradients, params), global_step=self.global_step)

        return train_op
q3_gru.py 文件源码 项目:CS224n 作者: akash9182 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def add_training_op(self, loss):
        """Sets up the training Ops.

        Creates an optimizer and applies the gradients to all trainable variables.
        The Op returned by this function is what must be passed to the
        `sess.run()` call to cause the model to train. See

        TODO:
            - Get the gradients for the loss from optimizer using
              optimizer.compute_gradients.
            - if self.clip_gradients is true, clip the global norm of
              the gradients using tf.clip_by_global_norm to self.config.max_grad_norm
            - Compute the resultant global norm of the gradients using
              tf.global_norm and save this global norm in self.grad_norm.
            - Finally, actually create the training operation by calling
              optimizer.apply_gradients.
        See: https://www.tensorflow.org/api_docs/python/train/gradient_clipping
        Args:
            loss: Loss tensor.
        Returns:
            train_op: The Op for training.
        """

        optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr)

        ### YOUR CODE HERE (~6-10 lines)

        # - Remember to clip gradients only if self.config.clip_gradients
        # is True.
        # - Remember to set self.grad_norm

        ### END YOUR CODE

        assert self.grad_norm is not None, "grad_norm was not set properly!"
        return train_op
q_network.py 文件源码 项目:RL-Universe 作者: Bifrost-Research 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def build_network(self):

        state = tf.placeholder(tf.float32, [None, 84, 84, 4])

        cnn_1 = slim.conv2d(state, 16, [8,8], stride=4, scope=self.name + '/cnn_1', activation_fn=nn.relu)

        cnn_2 = slim.conv2d(cnn_1, 32, [4,4], stride=2, scope=self.name + '/cnn_2', activation_fn=nn.relu)

        flatten = slim.flatten(cnn_2)

        fcc_1 = slim.fully_connected(flatten, 256, scope=self.name + '/fcc_1', activation_fn=nn.relu)

        adv_probas = slim.fully_connected(fcc_1, self.nb_actions, scope=self.name + '/adv_probas', activation_fn=nn.softmax)

        value_state = slim.fully_connected(fcc_1, 1, scope=self.name + '/value_state', activation_fn=None)

        tf.summary.scalar("model/cnn1_global_norm", tf.global_norm(slim.get_variables(scope=self.name + '/cnn_1')))
        tf.summary.scalar("model/cnn2_global_norm", tf.global_norm(slim.get_variables(scope=self.name + '/cnn_2')))
        tf.summary.scalar("model/fcc1_global_norm", tf.global_norm(slim.get_variables(scope=self.name + '/fcc_1')))
        tf.summary.scalar("model/adv_probas_global_norm", tf.global_norm(slim.get_variables(scope=self.name + '/adv_probas')))
        tf.summary.scalar("model/value_state_global_norm", tf.global_norm(slim.get_variables(scope=self.name + '/value_state')))

        #Input
        self._tf_state = state

        #Output
        self._tf_adv_probas = adv_probas
        self._tf_value_state = value_state
optimization.py 文件源码 项目:main_loop_tf 作者: fvisin 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __add_summaries(self, grads_and_vars, grad_noise_scale,
                        dev_set_scope, summaries=[]):
        if summaries == []:
            return

        # Add summary for the noise on the gradient
        # -----------------------------------------
        if grad_noise_scale is not None:
            with tf.name_scope(dev_set_scope):
                tf.summary.scalar("NoiseGrad", grad_noise_scale,
                                  summaries)

        # Add histograms for variables, grads and grad norms
        # --------------------------------------------------
        for grad, var in grads_and_vars:
            if isinstance(grad, tf.IndexedSlices):
                grad_vals = grad.values
            else:
                grad_vals = grad

            if grad_vals is not None:
                # Remove model_name/
                var_name = var.op.name.replace(
                    self.cfg.model_name + '/', '')
                scope_str = dev_set_scope + '_%s'  # metric
                scope_str, var_name = squash_maybe(scope_str,
                                                   var_name)
                scope_str += '_%s'  # var name
                # Write the summary
                with tf.name_scope(None):
                    tf.summary.scalar(
                        scope_str % ('GradientNorm', var_name),
                        tf.global_norm([grad_vals]), summaries)
                    tf.summary.histogram(
                        scope_str % ('GradientHist', var_name),
                        grad_vals, summaries)

        # Add global norm summary
        # -----------------------
        # Remove the name_scopes (the one from the variable_scope
        # and the one from the name_scope)
        with tf.name_scope(dev_set_scope):
            name = ('clipped_grad_norm' if self.cfg.max_grad_norm else
                    'grad_norm')
            tf.summary.scalar('Global_norm/' + name,
                              tf.global_norm(list(zip(*grads_and_vars))[0]),
                              summaries)
DNGPU_model.py 文件源码 项目:DNGPU 作者: LUMII-Syslab 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def createGraph(self):
        """Creates graph for training"""
        self.base_cost=0.0
        self.accuracy = 0
        num_sizes = len(self.bins)
        self.cost_list = []
        sum_weight=0
        self.bin_losses = []
        saturation_loss = []

        # Create all bins and calculate losses for them

        with vs.variable_scope("var_lengths"):
            for seqLength,itemCount, ind in zip(self.bins, self.count_list, range(num_sizes)):
                x_in = tf.placeholder("int32", [itemCount, seqLength])
                y_in = tf.placeholder("int64", [itemCount, seqLength])
                self.x_input.append(x_in)
                self.y_input.append(y_in)
                self.saturation_costs = []
                c, a, _, _, perItemCost, _ = self.createLoss(x_in,y_in,seqLength)

                weight = 1.0#/seqLength
                sat_cost = tf.add_n(self.saturation_costs) / ((seqLength ** 2) * itemCount)
                saturation_loss.append(sat_cost*weight)
                self.bin_losses.append(perItemCost)
                self.base_cost += c * weight
                sum_weight+=weight
                self.accuracy += a
                self.cost_list.append(c)
                tf.get_variable_scope().reuse_variables()

        # calculate the total loss
        self.base_cost /= sum_weight
        self.accuracy /= num_sizes

        self.sat_loss = tf.reduce_sum(tf.stack(saturation_loss))*self.saturation_weight / sum_weight
        cost = self.base_cost + self.sat_loss

        # add gradient noise proportional to learning rate
        tvars = tf.trainable_variables()
        grads_0 = tf.gradients(cost, tvars)

        grads = []
        for grad in grads_0:
                grad1 = grad+tf.truncated_normal(tf.shape(grad)) * self.learning_rate*1e-4
                grads.append(grad1)

        # optimizer
        optimizer = AdamaxOptimizer(self.learning_rate, beta1=0.9, beta2 = 1.0-self.beta2_rate, epsilon=1e-8)
        self.optimizer = optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step)

        # some values for printout
        max_vals=[]

        for var in tvars:
            varV = optimizer.get_slot(var, "m")
            max_vals.append(varV)

        self.gnorm = tf.global_norm(max_vals)
        self.cost_list = tf.stack(self.cost_list)
q2_linear.py 文件源码 项目:cs234_reinforcement_learning 作者: hbghhy 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def add_optimizer_op(self, scope):
        """
        Set self.train_op and self.grad_norm
        """

        ##############################################################
        """
        TODO: 1. get Adam Optimizer (remember that we defined self.lr in the placeholders
                section)
              2. compute grads wrt to variables in scope for self.loss
              3. clip the grads by norm with self.config.clip_val if self.config.grad_clip
                is True
              4. apply the gradients and store the train op in self.train_op
               (sess.run(train_op) must update the variables)
              5. compute the global norm of the gradients and store this scalar
                in self.grad_norm

        HINT: you may find the following functinos useful
            - tf.get_collection
            - optimizer.compute_gradients
            - tf.clip_by_norm
            - optimizer.apply_gradients
            - tf.global_norm

             you can access config variable by writing self.config.variable_name

        (be sure that you set self.train_op and self.grad_norm)
        """
        ##############################################################
        #################### YOUR CODE HERE - 8-12 lines #############

        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
        gradients, v = list(zip(*optimizer.compute_gradients(self.loss, variables)))

        if self.config.grad_clip:
            gradients, _ = tf.clip_by_global_norm(gradients, self.config.clip_val)

        # Use the clipped gradients for optimization
        self.grad_norm = tf.global_norm(gradients)
        self.train_op = optimizer.apply_gradients(list(zip(gradients, v)))

        ##############################################################
        ######################## END YOUR CODE #######################
A3C.py 文件源码 项目:CartPole-v0 作者: hmtai6 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def _createModel(self):
       with tf.variable_scope(self.scope):
           self.inputs = tf.placeholder('float', shape=[None,self.stateSize])
           x1 = slim.fully_connected(
               self.inputs,
               64,
               scope='fc/fc_1',
               activation_fn=tf.nn.relu)

           self.policy = slim.fully_connected(x1, self.actionSize,
               activation_fn=tf.nn.softmax,
               weights_initializer=Brian.normalized_columns_initializer(0.01),
               biases_initializer=None)
           self.value = slim.fully_connected(x1,1,
               activation_fn=None,
               weights_initializer=Brian.normalized_columns_initializer(1.0),
               biases_initializer=None)

           self.update_local_ops = Brian.update_target_graph('global',self.scope)

           if self.scope != 'global':
               self.actions = tf.placeholder( shape=[None], dtype=tf.int32)
               self.actions_onehot = tf.one_hot(self.actions, self.actionSize, dtype=tf.float32)
               self.target_v = tf.placeholder(shape=[None],dtype=tf.float32)
               self.advantages = tf.placeholder(shape=[None],dtype=tf.float32)

               self.responsible_outputs = tf.reduce_sum(self.policy * self.actions_onehot, [1])

               #Loss functions
               self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.target_v - tf.reshape(self.value,[-1])))
               self.entropy = - tf.reduce_sum(self.policy * tf.log(self.policy))
               self.policy_loss = -tf.reduce_sum(tf.log(self.responsible_outputs)*self.advantages)
               self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01

               #Get gradients from local network using local losses
               local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
               self.gradients = tf.gradients(self.loss,local_vars)
               self.var_norms = tf.global_norm(local_vars)
               grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0)

               #Apply local gradients to global network
               global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
               self.apply_grads = self.trainer.apply_gradients(zip(grads,global_vars))


问题


面经


文章

微信
公众号

扫码关注公众号