python类clip_by_global_norm()的实例源码

base.py 文件源码 项目:a3c-tensorflow 作者: carpedm20 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def build_shared_grad(self):
    self.grads = tf.gradients(self.loss, self.local_network.var_list)

    clipped_grads, _ = tf.clip_by_global_norm(self.grads, self.config.max_grad_norm)

    # copy weights from the parameter server to the local model
    self.sync = tf.group(*[v1.assign(v2) for v1, v2 in zip(self.local_network.var_list, self.network.var_list)])

    grads_and_vars = list(zip(clipped_grads, self.network.var_list))
    inc_step = self.global_step.assign_add(tf.shape(self.local_network.x)[0])

    # each worker has a different set of adam optimizer parameters
    self.lr = tf.train.exponential_decay(
            self.config.lr_start, self.global_step, self.config.lr_decay_step,
            self.config.lr_decay_rate, staircase=True, name='lr')

    opt = tf.train.AdamOptimizer(self.lr)
    self.train_op = tf.group(opt.apply_gradients(grads_and_vars), inc_step)
    self.summary_writer = None
    self.local_steps = 0

    self.build_summary()
optimizers.py 文件源码 项目:mist-rnns 作者: rdipietro 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def clip(grads_and_vars, max_global_norm):
  """ Clip the gradients that are returned from a TensorFlow Optimizer.

  Note that the term "clipping" is often used in literature but here is actually
  the wrong term: if the norm of all gradients concatenated does not exceed
  `max_global_norm`, then don't modify them. If the norm does exceed
  `max_global_norm`, then rescale all gradients globally so that the new norm
  becomes `max_global_norm`.

  Args:
    grads_and_vars: A list of `(grad, var)` pairs.
    max_global_norm: A float.

  Returns:
    A list of `(grad, var)` pairs with clipped gradients.
  """

  grads, vars = zip(*grads_and_vars)
  grads, _ = tf.clip_by_global_norm(grads, clip_norm=max_global_norm)
  grads_and_vars = list(zip(grads, vars))
  return grads_and_vars
graph.py 文件源码 项目:multi-task-learning 作者: jg8610 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _training(self, loss, config):
        """Sets up training ops

        Creates the optimiser

        The op returned from this is what is passed to session run

            Args:
                loss float
                learning_rate float

            returns:

            Op for training
        """
        # Create the gradient descent optimizer with the
        # given learning rate.
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.apply_gradients(zip(grads, tvars))
        return train_op
trainer.py 文件源码 项目:RNNVis 作者: myaooo 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_gradient_clipper(clipper, *args, **kwargs):
    """
    Simple helper to get Gradient Clipper
    E.g: clipper = get_gradient_clipper('value', value_min, value_max, name='ValueClip')
    :param clipper: a string denoting TF Gradient Clipper (e.g. "global_norm", denote tf.clip_by_global_norm)
        or a function of type f(tensor) -> clipped_tensor
    :param args: used to create the clipper
    :param kwargs: used to create the clipper
    :return: a function (tensor) -> (clipped tensor)
    """
    if callable(clipper):
        return clipper
    # workaround of global_norm clipper, since it returns two variable with the second one as a scalar tensor
    if clipper == 'global_norm':
        return lambda t_list: tf.clip_by_global_norm(t_list, *args, **kwargs)[0]
    if clipper in _str2clipper:
        clipper = _str2clipper[clipper]
    else:
        raise ValueError('clipper should be a callable function or a given key in _str2clipper!')
    return lambda t_list: [clipper(t, *args, **kwargs) for t in t_list]
seq2seq_attention_model.py 文件源码 项目:dialog_research 作者: wjbianjason 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _add_train_op(self):
    # op????
    """Sets self._train_op, op to run for training."""
    hps = self._hps

    self._lr_rate = tf.maximum(
        hps.min_lr,  # min_lr_rate.
        tf.train.exponential_decay(hps.lr, self.global_step, 30000, 0.98))

    tvars = tf.trainable_variables()
    with tf.device(self._get_gpu(self._num_gpus-1)):
      grads, global_norm = tf.clip_by_global_norm(
          tf.gradients(self._loss, tvars), hps.max_grad_norm)
    tf.scalar_summary('global_norm', global_norm)
    optimizer = tf.train.GradientDescentOptimizer(self._lr_rate)
    tf.scalar_summary('learning rate', self._lr_rate)
    self._train_op = optimizer.apply_gradients(
        zip(grads, tvars), global_step=self.global_step, name='train_step')
disc_model.py 文件源码 项目:TextGAN 作者: AustinStoneProjects 项目源码 文件源码 阅读 14 收藏 0 点赞 0 评论 0
def attach_cost(self, gen_model):
        # TODO: Shouldn't dynamic RNN be used here?
        # output_text, states_text = rnn.rnn(cell, inputs, initial_state=self.initial_state)
        predicted_classes_text = self.discriminate_text(self.input_data_text)
        self.loss_text = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(predicted_classes_text, np.ones((self.args.batch_size, 1), dtype=np.float32)))
        generated_wv = gen_model.generate()
        predicted_classes_wv = self.discriminate_wv(generated_wv)
        self.loss_gen = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(predicted_classes_wv, np.zeros((self.args.batch_size, 1), dtype=np.float32)))
        self.loss = .5 * self.loss_gen + .5 * self.loss_text
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
            self.args.grad_clip)
        # optimize only discriminator owned variables 
        g_and_v = [(g, v) for g, v in zip(grads, tvars) if v.name.startswith('DISC')]
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(g_and_v)
decomposable.py 文件源码 项目:multiffn-nli 作者: erickrf 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def _create_training_tensors(self, optimizer_algorithm):
        """
        Create the tensors used for training
        """
        with tf.name_scope('training'):
            if optimizer_algorithm == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(self.learning_rate)
            elif optimizer_algorithm == 'adam':
                optimizer = tf.train.AdamOptimizer(self.learning_rate)
            elif optimizer_algorithm == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(self.learning_rate)
            else:
                ValueError('Unknown optimizer: %s' % optimizer_algorithm)

            gradients, v = zip(*optimizer.compute_gradients(self.loss))
            if self.clip_value is not None:
                gradients, _ = tf.clip_by_global_norm(gradients,
                                                      self.clip_value)
            self.train_op = optimizer.apply_gradients(zip(gradients, v))
tacotron.py 文件源码 项目:Tacotron 作者: barronalex 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def add_train_op(self, loss):
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        opt = tf.train.AdamOptimizer(learning_rate=self.lr)

        gradients, variables = zip(*opt.compute_gradients(loss))
        # save selected gradient summaries
        #for grad in gradients:
            #if 'BasicDecoder' in grad.name or 'gru_cell' in grad.name or 'highway_3' in grad.name:
                #tf.summary.scalar(grad.name, tf.reduce_sum(grad))

        # optionally cap and noise gradients to regularize
        if self.config.cap_grads > 0:
            with tf.variable_scope('cap_grads'):
                tf.summary.scalar('global_gradient_norm', tf.global_norm(gradients))
                gradients, _ = tf.clip_by_global_norm(gradients, self.config.cap_grads)

        train_op = opt.apply_gradients(zip(gradients, variables), global_step=self.global_step)
        return train_op
highway_uniform.py 文件源码 项目:tensorflow-input-pipelines 作者: ischlag 项目源码 文件源码 阅读 13 收藏 0 点赞 0 评论 0
def _build_train_op(self):
    """Build training specific ops for the graph."""
    self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
    tf.scalar_summary(self.mode + '/learning rate', self.lrn_rate)

    trainable_variables = tf.trainable_variables()
    grads = tf.gradients(self.cost, trainable_variables)

    if self.hps.optimizer == 'sgd':
      optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
    elif self.hps.optimizer == 'mom':
      #optimizer = tf.train.AdamOptimizer(0.001)
      #ooptimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9, use_nesterov=True)
      optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)

    clipped_grads, _ = tf.clip_by_global_norm(grads, 1)
    apply_op = optimizer.apply_gradients(
        zip(clipped_grads, trainable_variables),
        global_step=self.global_step, name='train_step')

    train_ops = [apply_op] + self._extra_train_ops + tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    self.train_op = tf.group(*train_ops)
train.py 文件源码 项目:tensorflow 作者: sliderSun 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def train_neural_network():
    logits, last_state, _, _, _ = neural_network()
    targets = tf.reshape(output_targets, [-1])
    loss = tf.nn.seq2seq.sequence_loss_by_example([logits], [targets], [tf.ones_like(targets, dtype=tf.float32)], len(words))
    cost = tf.reduce_mean(loss)
    learning_rate = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.apply_gradients(zip(grads, tvars))

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        saver = tf.train.Saver(tf.all_variables())

        for epoch in range(50):
            sess.run(tf.assign(learning_rate, 0.002 * (0.97 ** epoch)))
            n = 0
            for batche in range(n_chunk):
                train_loss, _ , _ = sess.run([cost, last_state, train_op], feed_dict={input_data: x_batches[n], output_targets: y_batches[n]})
                n += 1
                print(epoch, batche, train_loss)
            if epoch % 7 == 0:
                saver.save(sess, 'poetry.module', global_step=epoch)
base.py 文件源码 项目:polyaxon 作者: polyaxon 项目源码 文件源码 阅读 13 收藏 0 点赞 0 评论 0
def _clip_gradients_fn(self, grads_and_vars):
        """Clips gradients by global norm."""
        gradients, variables = zip(*grads_and_vars)
        self._grads_and_vars = grads_and_vars

        if self._clip_gradients > 0.0:
            clipped_gradients, _ = tf.clip_by_global_norm(
                t_list=gradients, clip_norm=self._clip_gradients)
            grads_and_vars = list(zip(clipped_gradients, variables))
        if self._clip_embed_gradients > 0.0:
            clipped_gradients = []
            variables = []
            for gradient, variable in grads_and_vars:
                if "embedding" in variable.name or "Embedding" in variable.name:
                    tmp = tf.clip_by_norm(t=gradient.values, clip_norm=self._clip_embed_gradients)
                    gradient = tf.IndexedSlices(tmp, gradient.indices, gradient.dense_shape)
                clipped_gradients.append(gradient)
                variables.append(variable)
            grads_and_vars = list(zip(clipped_gradients, variables))
        return grads_and_vars
graph_definition.py 文件源码 项目:skiprnn-2017-telecombcn 作者: imatge-upc 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def compute_gradients(loss, learning_rate, gradient_clipping=-1):
    """
    Create optimizer, compute gradients and (optionally) apply gradient clipping
    """
    opt = tf.train.AdamOptimizer(learning_rate)
    if gradient_clipping > 0:
        vars_to_optimize = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, vars_to_optimize), clip_norm=gradient_clipping)
        grads_and_vars = list(zip(grads, vars_to_optimize))
    else:
        grads_and_vars = opt.compute_gradients(loss)
    return opt, grads_and_vars
model_base.py 文件源码 项目:seq2seq 作者: google 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def _clip_gradients(self, grads_and_vars):
    """Clips gradients by global norm."""
    gradients, variables = zip(*grads_and_vars)
    clipped_gradients, _ = tf.clip_by_global_norm(
        gradients, self.params["optimizer.clip_gradients"])
    return list(zip(clipped_gradients, variables))
critic_network.py 文件源码 项目:RickLiuGC 作者: liuruijin17 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def TrainingOp(loss, dataSetSize, batch_size, max_grad_norm):

    var_list = tf.trainable_variables()
    grads = tf.gradients(loss, var_list)
    grads, _ = tf.clip_by_global_norm(grads, max_grad_norm)
    global_step = tf.get_variable(
        'global_step', [], initializer=tf.constant_initializer(0), trainable=False
    )
    training_steps_per_epoch = dataSetSize // batch_size
    learning_rate = tf.train.exponential_decay(
        1e-3, global_step, training_steps_per_epoch, 0.999,staircase=True)
    optimizer = tf.train.RMSPropOptimizer(learning_rate)
    train_op = optimizer.apply_gradients(zip(grads, var_list), global_step=global_step)

    return train_op, learning_rate
ops.py 文件源码 项目:photinia 作者: XoriieInpottn 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def clip_gradient(pair_list,
                  max_norm):
    """Perform gradient clipping.
    If the gradients' global norm exceed 'max_norm', then shrink it to 'max_norm'.

    :param pair_list: (grad, var) pair list.
    :param max_norm: The max global norm.
    :return: (grad, var) pair list, the original gradients' norm, the clipped gradients' norm
    """
    grad_list = [grad for grad, _ in pair_list]
    grad_list, raw_grad = tf.clip_by_global_norm(grad_list, max_norm)
    grad = tf.global_norm(grad_list)
    pair_list = [(grad, pair[1]) for grad, pair in zip(grad_list, pair_list)]
    return pair_list, raw_grad, grad
language_model.py 文件源码 项目:lm 作者: rafaljozefowicz 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _backward(self, loss, summaries=False):
        hps = self.hps

        loss = loss * hps.num_steps

        emb_vars = find_trainable_variables("emb")
        lstm_vars = find_trainable_variables("LSTM")
        softmax_vars = find_trainable_variables("softmax")

        all_vars = emb_vars + lstm_vars + softmax_vars
        grads = tf.gradients(loss, all_vars)
        orig_grads = grads[:]
        emb_grads = grads[:len(emb_vars)]
        grads = grads[len(emb_vars):]
        for i in range(len(emb_grads)):
            assert isinstance(emb_grads[i], tf.IndexedSlices)
            emb_grads[i] = tf.IndexedSlices(emb_grads[i].values * hps.batch_size, emb_grads[i].indices,
                                            emb_grads[i].dense_shape)

        lstm_grads = grads[:len(lstm_vars)]
        softmax_grads = grads[len(lstm_vars):]

        lstm_grads, lstm_norm = tf.clip_by_global_norm(lstm_grads, hps.max_grad_norm)
        clipped_grads = emb_grads + lstm_grads + softmax_grads
        assert len(clipped_grads) == len(orig_grads)

        if summaries:
            tf.scalar_summary("model/lstm_grad_norm", lstm_norm)
            tf.scalar_summary("model/lstm_grad_scale", tf.minimum(hps.max_grad_norm / lstm_norm, 1.0))
            tf.scalar_summary("model/lstm_weight_norm", tf.global_norm(lstm_vars))
            # for v, g, cg in zip(all_vars, orig_grads, clipped_grads):
            #     name = v.name.lstrip("model/")
            #     tf.histogram_summary(name + "/var", v)
            #     tf.histogram_summary(name + "/grad", g)
            #     tf.histogram_summary(name + "/clipped_grad", cg)

        return list(zip(clipped_grads, all_vars))
kfac.py 文件源码 项目:baselines 作者: openai 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def apply_gradients(self, grads):
        coldOptim = tf.train.MomentumOptimizer(
            self._cold_lr, self._momentum)

        def coldSGDstart():
            sgd_grads, sgd_var = zip(*grads)

            if self.max_grad_norm != None:
                sgd_grads, sgd_grad_norm = tf.clip_by_global_norm(sgd_grads,self.max_grad_norm)

            sgd_grads = list(zip(sgd_grads,sgd_var))

            sgd_step_op = tf.assign_add(self.sgd_step, 1)
            coldOptim_op = coldOptim.apply_gradients(sgd_grads)
            if KFAC_DEBUG:
                with tf.control_dependencies([sgd_step_op, coldOptim_op]):
                    sgd_step_op = tf.Print(
                        sgd_step_op, [self.sgd_step, tf.convert_to_tensor('doing cold sgd step')])
            return tf.group(*[sgd_step_op, coldOptim_op])

        kfacOptim_op, qr = self.apply_gradients_kfac(grads)

        def warmKFACstart():
            return kfacOptim_op

        return tf.cond(tf.greater(self.sgd_step, self._cold_iter), warmKFACstart, coldSGDstart), qr
pointer_net.py 文件源码 项目:ReLiefParser 作者: XuezheMax 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def __call__(self, enc_input, dec_input_indices, valid_indices, left_indices, right_indices, values, valid_masks=None):
        batch_size = tf.shape(enc_input)[0]
        # forward computation graph
        with tf.variable_scope(self.scope):
            # encoder output
            enc_memory, enc_final_state_fw, _ = self.encoder(enc_input)

            # decoder
            dec_hiddens, dec_actions, dec_act_logps = self.decoder(
                                                            enc_memory, dec_input_indices, 
                                                            valid_indices, left_indices, right_indices,
                                                            valid_masks, init_state=enc_final_state_fw)

            # cost
            costs = []
            update_ops = []
            for step_idx, (act_logp, value, baseline) in enumerate(zip(dec_act_logps, values, self.baselines)):
                # costs.append(-tf.reduce_mean(act_logp * (value - baseline)))
                new_baseline = self.bl_ratio * baseline + (1-self.bl_ratio) * tf.reduce_mean(value)
                costs.append(-tf.reduce_mean(act_logp * value))
                update_ops.append(tf.assign(baseline, new_baseline))

        # gradient computation graph
        self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope)
        train_ops = []
        for limit in self.buckets:
            print '0 ~ %d' % (limit-1)
            grad_params = tf.gradients(tf.reduce_sum(tf.pack(costs[:limit])), self.params)
            if self.max_grad_norm is not None:
                clipped_gradients, norm = tf.clip_by_global_norm(grad_params, self.max_grad_norm)
            else:
                clipped_gradients = grad_params
            train_op = self.optimizer.apply_gradients(
                            zip(clipped_gradients, self.params))
            with tf.control_dependencies([train_op] + update_ops[:limit]):
                # train_ops.append(tf.Print(tf.constant(1.), [norm]))
                train_ops.append(tf.constant(1.))

        return dec_hiddens, dec_actions, train_ops

#### test script
transducer_model.py 文件源码 项目:segmenter 作者: yanshao9798 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def define(self, char_num, rnn_dim, emb_dim, max_x, max_y, write_trans_model=True):
        self.decode_step = max_y
        self.encode_step = max_x
        self.en_vec = [tf.placeholder(tf.int32, [None], name='en_input' + str(i)) for i in range(max_x)]
        self.trans_labels = [tf.placeholder(tf.int32, [None], name='de_input' + str(i)) for i in range(max_y)]
        weights = [tf.cast(tf.sign(ot_t), tf.float32) for ot_t in self.trans_labels]
        self.de_vec = [tf.zeros_like(self.trans_labels[0], tf.int32)] + self.trans_labels[:-1]
        self.feed_previous = tf.placeholder(tf.bool)
        self.trans_l_rate = tf.placeholder(tf.float32, [], name='learning_rate')
        seq_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_dim, state_is_tuple=True)
        self.trans_output, states = seq2seq.embedding_attention_seq2seq(self.en_vec, self.de_vec, seq_cell, char_num,
                                                                        char_num, emb_dim, feed_previous=self.feed_previous)

        loss = seq2seq.sequence_loss(self.trans_output, self.trans_labels, weights)
        optimizer = tf.train.AdagradOptimizer(learning_rate=self.trans_l_rate)

        params = tf.trainable_variables()
        gradients = tf.gradients(loss, params)
        clipped_gradients, norm = tf.clip_by_global_norm(gradients, 5.0)
        self.trans_train = optimizer.apply_gradients(zip(clipped_gradients, params))

        self.saver = tf.train.Saver()

        if write_trans_model:
            param_dic = {}
            param_dic['char_num'] = char_num
            param_dic['rnn_dim'] = rnn_dim
            param_dic['emb_dim'] = emb_dim
            param_dic['max_x'] = max_x
            param_dic['max_y'] = max_y
            # print param_dic
            f_model = open(self.trained + '_model', 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()
fp.py 文件源码 项目:cancer 作者: yancz1989 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def build(H, dat, sess):
  with open(META_DIR + 'fp.json') as fpj:
    meta = json.load(fpj)
  bsize = H['batch_size']
  x = tf.placeholder(tf.float32, shape = [64, 64, 1])
  y = tf.placeholder(tf.float32, shape = [1,])
  training = tf.placeholder(tf.bool)

  fptrunk = FPTrunk(dat, x, y, bsize, sess)
  Xt, Yt = tf.train.batch(fptrunk.q['train'].dequeue(), batch_size = bsize, capacity = bsize)
  Xv, Yv = tf.train.batch(fptrunk.q['valid'].dequeue(), batch_size = bsize, capacity = bsize)

  logits, preds = model(H, Xt, training)
  loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
      logits=logits, labels=tf.cast(Yt, tf.float32)))
  varst = tf.trainable_variables()
  gstep = tf.Variable(0, trainable = False)
  opts = {
      'RMS': tf.train.RMSPropOptimizer,
      'Adam': tf.train.AdamOptimizer,
      'SGD': tf.train.GradientDescentOptimizer,
      'Adagrad': tf.train.AdagradOptimizer
    }
  opt = opts[H['opt']](learning_rate = H['lr'])
  grads_vars = opt.compute_gradients(loss, varst)
  grads = [gv[0] for gv in grads_vars]
  vars = [gv[1] for gv in grads_vars]
  capped, norm = tf.clip_by_global_norm(grads, H['norm_clip'])
  train_opt = opt.apply_gradients([(capped[i], vars[i]) for i in range(len(vars))], global_step = gstep)

  saver = tf.train.Saver(max_to_keep = None)
  return (x, y, training, Xt, Yt, Xv, Yv, logits, loss, preds, opt, varst, gstep, train_opt, saver, fptrunk)


问题


面经


文章

微信
公众号

扫码关注公众号