python类clip_by_global_norm()的实例源码

base_aligner.py 文件源码 项目:almond-nnparser 作者: Stanford-Mobisocial-IoT-Lab 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def add_training_op(self, loss):
        #optimizer = tf.train.AdamOptimizer(self.config.lr)
        #optimizer = tf.train.AdagradOptimizer(self.config.lr)
        optclass = getattr(tf.train, self.config.optimizer + 'Optimizer')
        assert issubclass(optclass, tf.train.Optimizer)
        optimizer = optclass(self.config.learning_rate)

        gradient_var_pairs = optimizer.compute_gradients(loss)
        vars = [x[1] for x in gradient_var_pairs]
        gradients = [x[0] for x in gradient_var_pairs]
        if self.config.gradient_clip > 0:
            clipped, _ = tf.clip_by_global_norm(gradients, self.config.gradient_clip)
        else:
            clipped = gradients

        self.grad_norm = tf.global_norm(clipped)
        train_op = optimizer.apply_gradients(zip(clipped, vars))
        return train_op
character_rnn.py 文件源码 项目:deep-learning 作者: ljanyst 项目源码 文件源码 阅读 51 收藏 0 点赞 0 评论 0
def get_training_tensors(self, learning_rate = 0.001, grad_clip = 5):
        #-----------------------------------------------------------------------
        # Build a loss function
        #-----------------------------------------------------------------------
        with tf.name_scope('targets-encode'):
            y_one_hot  = tf.one_hot(self.targets, self.n_classes)
            y_reshaped = tf.reshape(y_one_hot, self.logits.get_shape())

        with tf.name_scope('loss'):
            loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                           labels=y_reshaped)
            loss = tf.reduce_mean(loss)
            tf.summary.scalar('loss', loss)

        #-----------------------------------------------------------------------
        # Build the optimizer
        #-----------------------------------------------------------------------
        with tf.name_scope('optimizer'):
            tvars     = tf.trainable_variables()
            grads, _  = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                               grad_clip)
            train_op  = tf.train.AdamOptimizer(learning_rate)
            optimizer = train_op.apply_gradients(zip(grads, tvars))

        return loss, optimizer
sentiment_rnn.py 文件源码 项目:deep-learning 作者: ljanyst 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_optimizer(self, learning_rate = 0.001, grad_clip = 5):
        #-----------------------------------------------------------------------
        # Build a loss function
        #-----------------------------------------------------------------------
        with tf.variable_scope('loss'):
            loss = tf.losses.mean_squared_error(self.target, self.output)

        #-----------------------------------------------------------------------
        # Build the optimizer
        #-----------------------------------------------------------------------
        with tf.variable_scope('optimizer'):
            tvars     = tf.trainable_variables()
            grads, _  = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                               grad_clip)
            train_op  = tf.train.AdamOptimizer(learning_rate)
            optimizer = train_op.apply_gradients(zip(grads, tvars))

        return optimizer, loss
caption_gen.py 文件源码 项目:Caption-Generation 作者: m516825 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def build_model(self):
        self.model = classmap[FLAGS.model_type](hidden_size=FLAGS.hidden, 
                                    vocab_size=self.vocab_size, 
                                    encoder_in_size=self.data.feats.shape[-1], 
                                    encoder_in_length=self.data.feats.shape[1],
                                    decoder_in_length=self.data.decoder_in.shape[-1] - 1, 
                                    word2vec_weight=self.w2v_W,
                                    embedding_size=FLAGS.embedding_dim,
                                    neg_sample_num=self.sample_num,
                                    start_id=self.vocab_processor._mapping['<BOS>'],
                                    end_id=self.vocab_processor._mapping['<EOS>'],
                                    Bk=FLAGS.K)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        self.optimizer = tf.train.RMSPropOptimizer(FLAGS.lr)

        tvars = tf.trainable_variables()

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.model.cost, tvars), 5)

        self.updates = self.optimizer.apply_gradients(
                        zip(grads, tvars), global_step=self.global_step)
        self.saver = tf.train.Saver(tf.global_variables())
seq2seq_model.py 文件源码 项目:seq2seq 作者: eske 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def get_update_op(self, loss, opts, global_step=None, max_gradient_norm=None, freeze_variables=None):
        if loss is None:
            return None

        freeze_variables = freeze_variables or []

        # compute gradient only for variables that are not frozen
        frozen_parameters = [var.name for var in tf.trainable_variables()
                             if any(re.match(var_, var.name) for var_ in freeze_variables)]
        params = [var for var in tf.trainable_variables() if var.name not in frozen_parameters]
        self.params = params

        gradients = tf.gradients(loss, params)
        if max_gradient_norm:
            gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)

        update_ops = []
        for opt in opts:
            with tf.variable_scope('gradients' if self.name is None else 'gradients_{}'.format(self.name)):
                update_op = opt.apply_gradients(list(zip(gradients, params)), global_step=global_step)

            update_ops.append(update_op)

        return update_ops
model.py 文件源码 项目:chatbot-generative 作者: DeeChat 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _create_optimizer(self):
        print('Create optimizer... ')
        with tf.variable_scope('training'):
            self.global_step = tf.Variable(
                0, dtype=tf.int32, trainable=False, name='global_step')

            if not self.fw_only:
                self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
                trainable_vars = tf.trainable_variables()
                self.gradient_norms = []
                self.train_ops = []
                start = time.time()
                for bucket_id in range(len(config.BUCKETS)):
                    clipped_grads, norm = tf.clip_by_global_norm(
                        tf.gradients(self.losses[bucket_id], trainable_vars),
                        config.MAX_GRAD_NORM)
                    self.gradient_norms.append(norm)
                    self.train_ops.append(self.optimizer.apply_gradients(
                        zip(clipped_grads, trainable_vars),
                        global_step=self.global_step))
                    print('Creating opt for bucket {:d} took {:.2f} seconds.'.format(
                        bucket_id, time.time() - start))
                    start = time.time()
seq2seq_model.py 文件源码 项目:tf-seq2seq 作者: JayParks 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def init_optimizer(self):
        print("setting optimizer..")
        # Gradients and SGD update operation for training the model
        trainable_params = tf.trainable_variables()
        if self.optimizer.lower() == 'adadelta':
            self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate)
        elif self.optimizer.lower() == 'adam':
            self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        elif self.optimizer.lower() == 'rmsprop':
            self.opt = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate)
        else:
            self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)

        # Compute gradients of loss w.r.t. all trainable variables
        gradients = tf.gradients(self.loss, trainable_params)

        # Clip gradients by a given maximum_gradient_norm
        clip_gradients, _ = tf.clip_by_global_norm(gradients, self.max_gradient_norm)

        # Update the model
        self.updates = self.opt.apply_gradients(
            zip(clip_gradients, trainable_params), global_step=self.global_step)
qa_model.py 文件源码 项目:Question-Answering 作者: MurtyShikhar 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def setup_train_op(self):
        """
        Add train_op to self
        """
        with tf.variable_scope("train_step"):
            adam_optimizer = tf.train.AdamOptimizer()
            grads, vars = zip(*adam_optimizer.compute_gradients(self.loss))

            clip_val = self.config.max_gradient_norm
            # if -1 then do not perform gradient clipping
            if clip_val != -1:
                clipped_grads, _ = tf.clip_by_global_norm(grads, self.config.max_gradient_norm)
                self.global_grad = tf.global_norm(clipped_grads)
                self.gradients = zip(clipped_grads, vars)
            else:
                self.global_grad = tf.global_norm(grads)
                self.gradients = zip(grads, vars)


            self.train_op = adam_optimizer.apply_gradients(self.gradients)

        self.init = tf.global_variables_initializer()
optimizer.py 文件源码 项目:relaax 作者: deeplearninc 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def build_graph(self, weights, loss=None, optimizer=None, norm=False, batch_size=None, grad_ys=None):
        if loss is not None:
            gradients = tf.gradients(loss.node, list(utils.Utils.flatten(weights.node)), grad_ys)
            gradients = [tf.check_numerics(g, 'gradient_%d' % i) for i, g in enumerate(gradients)]
            if batch_size is not None:
                gradients = [g / float(batch_size) for g in gradients]

            # store gradients global norm before clipping
            self.global_norm = tf.global_norm(gradients)

            # clip gradients after global norm has been stored
            if norm:
                gradients, _ = tf.clip_by_global_norm(gradients, norm)
            self.calculate = graph.TfNode(utils.Utils.reconstruct(gradients, weights.node))
        if optimizer is not None:
            self.ph_gradients = graph.Placeholders(weights)
            self.apply = graph.TfNode(optimizer.node.apply_gradients(
                    utils.Utils.izip(self.ph_gradients.checked, weights.node)))
seq2seq_attention_model.py 文件源码 项目:FYP-AutoTextSum 作者: MrRexZ 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _add_train_op(self):
    """Sets self._train_op, op to run for training."""
    hps = self._hps

    self._lr_rate = tf.maximum(
        hps.min_lr,  # min_lr_rate.
        tf.train.exponential_decay(hps.lr, self.global_step, 30000, 0.98))

    tvars = tf.trainable_variables()
    with tf.device(self._get_gpu(self._num_gpus-1)):
      grads, global_norm = tf.clip_by_global_norm(
          tf.gradients(self._loss, tvars), hps.max_grad_norm)
    tf.summary.scalar('global_norm', global_norm)
    optimizer = tf.train.GradientDescentOptimizer(self._lr_rate)
    tf.summary.scalar('learning rate', self._lr_rate)
    self._train_op = optimizer.apply_gradients(
        zip(grads, tvars), global_step=self.global_step, name='train_step')
copynet.py 文件源码 项目:text2text 作者: google 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _add_train_op(self):
    """Sets self._train_op, op to run for training."""
    config = self._config

    tvars = tf.trainable_variables()
    with tf.device(self._get_gpu(self._num_gpus - 1)):
      grads, global_norm = tf.clip_by_global_norm(
          tf.gradients(self._loss, tvars), config.max_grad_norm)
    tf.summary.scalar('global_norm', global_norm)

    lr_rate = tf.maximum(
        config.min_lr,  # min_lr_rate.
        tf.train.exponential_decay(config.lr, self.global_step,
                                   config.decay_steps, config.decay_rate))

    optimizer = tf.train.AdamOptimizer(lr_rate, epsilon=config.adam_epsilon)

    tf.summary.scalar('learning_rate', lr_rate)
    self._train_op = optimizer.apply_gradients(
        zip(grads, tvars), global_step=self.global_step, name='train_step')
bow2seq.py 文件源码 项目:text2text 作者: google 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _add_train_op(self):
    """Sets self._train_op, op to run for training."""
    config = self._config

    tvars = tf.trainable_variables()
    with tf.device(self._get_gpu(self._num_gpus - 1)):
      grads, global_norm = tf.clip_by_global_norm(
          tf.gradients(self._loss, tvars), config.max_grad_norm)
    tf.summary.scalar('global_norm', global_norm)

    lr_rate = tf.maximum(
        config.min_lr,  # min_lr_rate.
        tf.train.exponential_decay(config.lr, self.global_step, 30000, 0.98))

    if config.optimizer == 'adam':
      optimizer = tf.train.AdamOptimizer(lr_rate, epsilon=config.adam_epsilon)
    else:
      assert config.optimizer == 'gradient_descent', config.optimizer
      optimizer = tf.train.GradientDescentOptimizer(lr_rate)

    tf.summary.scalar('learning_rate', lr_rate)
    self._train_op = optimizer.apply_gradients(
        zip(grads, tvars), global_step=self.global_step, name='train_step')
seq2seq.py 文件源码 项目:text2text 作者: google 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def _add_train_op(self):
    """Sets self._train_op, op to run for training."""
    config = self._config

    tvars = tf.trainable_variables()
    with tf.device(self._get_gpu(self._num_gpus - 1)):
      grads, global_norm = tf.clip_by_global_norm(
          tf.gradients(self._loss, tvars), config.max_grad_norm)
    tf.summary.scalar('global_norm', global_norm)

    lr_rate = tf.maximum(
        config.min_lr,  # min_lr_rate.
        tf.train.exponential_decay(config.lr, self.global_step, 30000, 0.98))

    if config.optimizer == 'adam':
      optimizer = tf.train.AdamOptimizer(lr_rate, epsilon=config.adam_epsilon)
    else:
      assert config.optimizer == 'gradient_descent', config.optimizer
      optimizer = tf.train.GradientDescentOptimizer(lr_rate)

    tf.summary.scalar('learning_rate', lr_rate)
    self._train_op = optimizer.apply_gradients(
        zip(grads, tvars), global_step=self.global_step, name='train_step')
model_abs.py 文件源码 项目:NeuralSum 作者: cheng6076 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def training_graph(loss, learning_rate=1.0, max_grad_norm=5.0):
    ''' Builds training graph. '''
    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.variable_scope('SGD_Training'):
        # SGD learning parameter
        learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate')

        # collect all trainable variables
        tvars = tf.trainable_variables()
        grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), max_grad_norm)

        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)

    return adict(
        learning_rate=learning_rate,
        global_step=global_step,
        global_norm=global_norm,
        train_op=train_op)
model.py 文件源码 项目:NeuralSum 作者: cheng6076 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def training_graph(loss, learning_rate=1.0, max_grad_norm=5.0):
    ''' Builds training graph. '''
    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.variable_scope('SGD_Training'):
        # SGD learning parameter
        learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate')

        # collect all trainable variables
        tvars = tf.trainable_variables()
        grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), max_grad_norm)

        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)

    return adict(
        learning_rate=learning_rate,
        global_step=global_step,
        global_norm=global_norm,
        train_op=train_op)
training.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def clip_grad_global_norms(tvars, loss, opt, global_norm=1, gate_gradients=1, gradient_noise_scale=4.0, GATE_GRAPH=2, grad_loss=None, agre_method=None, col_grad_ops=False):
    """Clips the gradients by the given value.

    Args:
        tvars: trainable variables used for gradint updates
        loss: total loss of the network
        opt: optimizer
        global_norm: the maximum global norm

    Returns:
        A list of clipped gradient to variable pairs.
     """
    var_refs = [v.ref() for v in tvars]
    grads = tf.gradients(loss, var_refs, grad_ys=grad_loss, gate_gradients=(
        gate_gradients == 1), aggregation_method=agre_method, colocate_gradients_with_ops=col_grad_ops)
    if gradient_noise_scale > 1:
        grads = add_scaled_noise_to_gradients(
            list(zip(grads, tvars)), gradient_noise_scale=gradient_noise_scale)
    if gate_gradients == GATE_GRAPH:
        grads = tf.tuple(grads)
    grads, _ = tf.clip_by_global_norm(grads, global_norm)
    grads_and_vars = list(zip(grads, tvars))
    return grads_and_vars
base.py 文件源码 项目:tefla 作者: openAGI 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _clip_grad_global_norms(self, tvars, loss, opt, global_norm=8, gate_gradients=1, gradient_noise_scale=None, GATE_GRAPH=2, grad_loss=None, agre_method=None, col_grad_ops=False):
        """Clips the gradients by the given value.

        Args:
            tvars: trainable variables used for gradint updates
            loss: total loss of the network
            opt: optimizer
            global_norm: the maximum global norm

        Returns:
            A list of clipped gradient to variable pairs.
         """
        var_refs = [v.read_value() for v in tvars]
        grads = tf.gradients(loss, var_refs, grad_ys=grad_loss, gate_gradients=(
            gate_gradients == 1), aggregation_method=agre_method, colocate_gradients_with_ops=col_grad_ops)
        if gradient_noise_scale is not None:
            grads = self._add_scaled_noise_to_gradients(
                list(zip(grads, tvars)), gradient_noise_scale=gradient_noise_scale)
        if gate_gradients == GATE_GRAPH:
            grads = tf.tuple(grads)
        grads, _ = tf.clip_by_global_norm(grads, global_norm)
        grads_and_vars = list(zip(grads, tvars))
        return grads_and_vars
model.py 文件源码 项目:tf-lstm-char-cnn 作者: mkroutikov 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def training_graph(loss, learning_rate=1.0, max_grad_norm=5.0):
    ''' Builds training graph. '''
    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.variable_scope('SGD_Training'):
        # SGD learning parameter
        learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate')

        # collect all trainable variables
        tvars = tf.trainable_variables()
        grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), max_grad_norm)

        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)

    return adict(
        learning_rate=learning_rate,
        global_step=global_step,
        global_norm=global_norm,
        train_op=train_op)
model.py 文件源码 项目:tensorlm 作者: batzner 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def _build_optimizer(self):
        """Based on the loss tensor, build an optimizer that minimizes the loss.

        This function returns an optimizer operation that updates the model's trainable parameters
        by determining the loss's gradients w.r.t. each of the trainable parameters. Specifically,
        RMSProp is used to minimize the loss. The gradients are clipped to the max_gradient_norm to
        prevent too drastic updates of the trainable parameters. See also tf.clip_by_global_norm

        Returns:
            tf.Operation: An operation that updates the model's trainable parameters.
        """

        # Clip the gradients
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self._loss, tvars), self.max_gradient_norm)

        # Optimize the variables
        optimizer = tf.train.RMSPropOptimizer(self._learning_rate)
        return optimizer.apply_gradients(zip(grads, tvars))
TrainStockLSTM.py 文件源码 项目:Stocks-LSTM 作者: eric574 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size

        self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.float32, [batch_size, num_steps])

        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        iw = tf.get_variable("input_w", [1, size])
        ib = tf.get_variable("input_b", [size])
        inputs = [tf.nn.xw_plus_b(i_, iw, ib) for i_ in tf.split(1, num_steps, self._input_data)]
        if is_training and config.keep_prob < 1:
            inputs = [tf.nn.dropout(input_, config.keep_prob) for input_ in inputs]

        outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
        rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size])

        self._output = output = tf.nn.xw_plus_b(rnn_output,
                                 tf.get_variable("out_w", [size, 1]),
                                 tf.get_variable("out_b", [1]))

        self._cost = cost = tf.reduce_mean(tf.square(output - tf.reshape(self._targets, [-1])))
        self._final_state = states[-1]

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
seq2seq.py 文件源码 项目:fathom 作者: rdadolf 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def build_train(self, losses):
    # TODO: modify total_loss to handle buckets
    self.updates = None
    with self.G.as_default():
      # Gradients and SGD update operation for training the model.
      params = tf.trainable_variables()
      if not self.forward_only:
        self.gradient_norms = []
        self.updates = []
        self.opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        for b in xrange(len(self.buckets)):
          gradients = tf.gradients(self.losses[b], params)
          clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                           self.max_gradient_norm)
          self.gradient_norms.append(norm)
          self.updates.append(self.opt.apply_gradients(
              zip(clipped_gradients, params), global_step=self.global_step))

    return self.updates # note: this is per-bucket
model.py 文件源码 项目:tf-char-cnn-lstm 作者: hejunqing 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def training_graph(loss, learning_rate=1.0, max_grad_norm=5.0):
    ''' Builds training graph. '''
    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.variable_scope('SGD_Training'):
        # SGD learning parameter
        learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate')

        # collect all trainable variables
        tvars = tf.trainable_variables()
        grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), max_grad_norm)

        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)

    return adict(
        learning_rate=learning_rate,
        global_step=global_step,
        global_norm=global_norm,
        train_op=train_op)
seq2seq_attention_model.py 文件源码 项目:savchenko 作者: JuleLaryushina 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def _add_train_op(self):
    """Sets self._train_op, op to run for training."""
    hps = self._hps

    self._lr_rate = tf.maximum(
        hps.min_lr,  # min_lr_rate.
        tf.train.exponential_decay(hps.lr, self.global_step, 30000, 0.98))

    tvars = tf.trainable_variables()
    with tf.device(self._get_gpu(self._num_gpus-1)):
      grads, global_norm = tf.clip_by_global_norm(
          tf.gradients(self._loss, tvars), hps.max_grad_norm)
    tf.scalar_summary('global_norm', global_norm)
    optimizer = tf.train.GradientDescentOptimizer(self._lr_rate)
    tf.scalar_summary('learning rate', self._lr_rate)
    self._train_op = optimizer.apply_gradients(
        zip(grads, tvars), global_step=self.global_step, name='train_step')
yellowfin.py 文件源码 项目:MobileNet 作者: Zehaos 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def apply_gradients(self, grads_tvars, global_step=None, name=None):
    self._grads, self._tvars = zip(*grads_tvars)

    with tf.variable_scope("apply_updates"):
      if self._clip_thresh_var is not None:
        self._grads_clip, self._grads_norm = tf.clip_by_global_norm(self._grads, self._clip_thresh_var)
        apply_grad_op = \
          self._optimizer.apply_gradients(zip(self._grads_clip, self._tvars) )
      else:
        apply_grad_op = \
          self._optimizer.apply_gradients(zip(self._grads, self._tvars) )


    with tf.variable_scope("after_apply"):
      after_apply_op = self.after_apply()

    with tf.variable_scope("update_hyper"):
      with tf.control_dependencies( [after_apply_op] ):
        update_hyper_op = self.update_hyper_param()

    with tf.control_dependencies([update_hyper_op] ):
      self._increment_global_step_op = tf.assign(self._global_step, self._global_step + 1)

    return tf.group(apply_grad_op, after_apply_op, update_hyper_op, self._increment_global_step_op)
model.py 文件源码 项目:RobotWriter 作者: Moicen 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def build_optimizer(loss, learning_rate, grad_clip):
    ''' 
    ??Optimizer

    loss: ??
    learning_rate: ???

    '''

    # ??clipping gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))

    return optimizer
policy_network.py 文件源码 项目:gail-driver 作者: sisl 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _create_optimizer(self, args):
        # Find negagtive log-likelihood of true actions
        std_a = tf.exp(self.a_logstd)
        pl_1 = 0.5 * tf.to_float(args.action_dim) * np.log(2. * np.pi)
        pl_2 = tf.to_float(args.action_dim) * tf.reduce_sum(tf.log(std_a))
        pl_3 = 0.5 * \
            tf.reduce_mean(tf.reduce_sum(
                tf.square((self.targets - self.a_mean) / std_a), 1))
        policy_loss = pl_1 + pl_2 + pl_3

        # Find overall loss
        self.cost = policy_loss
        self.summary_policy = tf.scalar_summary(
            "Policy loss", tf.reduce_mean(policy_loss))

        # Perform parameter update
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self.cost, tvars), args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train = optimizer.apply_gradients(zip(grads, tvars))
selector.py 文件源码 项目:dynamic-coattention-network 作者: marshmelloX 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def _add_train_op(self):
    params = self._params

    self._lr_rate = tf.maximum(
        params.min_lr,
        tf.train.exponential_decay(params.lr, self._global_step, 30000, 0.98))

    tvars = tf.trainable_variables()
    # use reserved gpu for gradient computation
    with tf.device(self._get_gpu(self._num_gpus-1)):
      grads, global_norm = tf.clip_by_global_norm(
          tf.gradients(self._loss, tvars), params.max_grad_norm)
    tf.scalar_summary('global_norm', global_norm)
    optimizer = tf.train.AdamOptimizer(self._lr_rate)
    tf.scalar_summary('learning rate', self._lr_rate)
    with tf.device(self._next_device()):
      self._train_op = optimizer.apply_gradients(
          zip(grads, tvars), global_step=self._global_step, name='train_step')
    self._summaries = tf.merge_all_summaries()

    return self._train_op, self._loss,
trainer.py 文件源码 项目:DialogueBreakdownDetection2016 作者: icoxfog417 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def set_optimizer(self, session, learning_rate=0.5, learning_rate_decay_factor=0.99, max_gradient_norm=5.0, load_if_exist=True):
        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_opr = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)

        self.outputs, self.losses = self.calc_loss()

        params = tf.trainable_variables()
        for b in range(len(self.buckets)):
            gradients = tf.gradients(self.losses[b], params)
            clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)

            self.gradient_norms.append(norm)
            self.updates.append(self.optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables())
        session.run(tf.initialize_all_variables())
        if load_if_exist and self.train_dir:
            saved = tf.train.get_checkpoint_state(self.train_dir)
            if saved and tf.gfile.Exists(saved.model_checkpoint_path):
                self.saver.restore(session, saved.model_checkpoint_path)
trainer.py 文件源码 项目:DialogueBreakdownDetection2016 作者: icoxfog417 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def set_optimizer(self, session, learning_rate=0.1, learning_rate_decay_factor=0.99, max_gradient_norm=5.0, load_if_exist=True):
        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_opr = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)

        self.outputs, self.losses = self.calc_loss()

        params = tf.trainable_variables()
        for b in range(len(self.buckets)):
            gradients = tf.gradients(self.losses[b], params)
            clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)

            self.gradient_norms.append(norm)
            self.updates.append(self.optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables())
        session.run(tf.initialize_all_variables())
        if load_if_exist and self.train_dir:
            saved = tf.train.get_checkpoint_state(self.train_dir)
            if saved and tf.gfile.Exists(saved.model_checkpoint_path):
                self.saver.restore(session, saved.model_checkpoint_path)
seq2seq_trainer.py 文件源码 项目:DialogueBreakdownDetection2016 作者: icoxfog417 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def set_optimizer(self, session, learning_rate=0.5, learning_rate_decay_factor=0.99, max_gradient_norm=5.0, load_if_exist=True):
        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_opr = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)

        self.outputs, self.losses = self.calc_loss()

        params = tf.trainable_variables()
        for b in range(len(self.buckets)):
            gradients = tf.gradients(self.losses[b], params)
            clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)

            self.gradient_norms.append(norm)
            self.updates.append(self.optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables())
        session.run(tf.initialize_all_variables())
        if load_if_exist and self.train_dir:
            saved = tf.train.get_checkpoint_state(self.train_dir)
            if saved and tf.gfile.Exists(saved.model_checkpoint_path):
                self.saver.restore(session, saved.model_checkpoint_path)


问题


面经


文章

微信
公众号

扫码关注公众号