def get_train_op(self):
"""
define optimization operation
"""
if self.args.optimizer == "SGD":
optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.args.lr)
elif self.args.optimizer == "ADAM":
optimizer = tf.train.AdamOptimizer(learning_rate=self.args.lr)
else:
raise NotImplementedError("Other Optimizer Not Implemented.-_-||")
# gradient clip
grad_vars = optimizer.compute_gradients(self.loss)
grad_vars = [
(tf.clip_by_norm(grad, self.args.grad_clipping), var)
if grad is not None else (grad, var)
for grad, var in grad_vars]
self.train_op = optimizer.apply_gradients(grad_vars, self.step)
return
python类clip_by_norm()的实例源码
def build_model(self):
self.build_memory()
self.W = tf.Variable(tf.random_normal([self.edim, self.nwords], stddev=self.init_std))
z = tf.matmul(self.hid[-1], self.W)
self.loss = tf.nn.softmax_cross_entropy_with_logits(z, self.target)
self.lr = tf.Variable(self.current_lr)
self.opt = tf.train.GradientDescentOptimizer(self.lr)
params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W]
grads_and_vars = self.opt.compute_gradients(self.loss,params)
clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \
for gv in grads_and_vars]
inc = self.global_step.assign_add(1)
with tf.control_dependencies([inc]):
self.optim = self.opt.apply_gradients(clipped_grads_and_vars)
tf.initialize_all_variables().run()
self.saver = tf.train.Saver()
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def _clip_grad_norms(self, gradients_to_variables, max_norm=5):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
grads_and_vars.append((grad, var))
return grads_and_vars
def _clip_grad_norms(self, gradients_to_variables, max_norm=5):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
grads_and_vars.append((grad, var))
return grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def set_train_op(loss, tvars):
if FLAGS.optimizer_type == "sgd":
optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
elif FLAGS.optimizer_type == "rmsprop":
optimizer = tf.train.RMSPropOptimizer(learning_rate=FLAGS.learning_rate)
elif FLAGS.optimizer_type == "adam":
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
else:
raise ValueError("Wrong optimizer_type.")
gradients = optimizer.compute_gradients(loss, var_list=tvars)
clipped_gradients = [(grad if grad is None else tf.clip_by_norm(grad, FLAGS.max_grads), var)
for grad, var in gradients]
train_op = optimizer.apply_gradients(clipped_gradients)
return train_op
def _clip_grad_norms(gradients_to_variables, max_norm=10):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
grads_and_vars.append((grad, var))
return grads_and_vars
def _clip_sparse(self, grad, var):
assert isinstance(grad, tf.IndexedSlices)
clip_dims = self._vars_to_clip_dims[var]
if 0 in clip_dims:
log.warn("Clipping norm across dims %s for %s is inefficient "
"when including sparse dimension 0.", clip_dims,
var.op.name)
return self._clip_dense(var)
with tf.colocate_with(var):
var_subset = tf.gather(var, grad.indices)
with self._maybe_colocate_with(var):
normalized_var_subset = tf.clip_by_norm(
var_subset, self._max_norm, clip_dims)
delta = tf.IndexedSlices(
var_subset - normalized_var_subset, grad.indices, grad.dense_shape)
with tf.colocate_with(var):
return var.scatter_sub(delta, use_locking=self._use_locking)
def _clip_grad_norms(self, gradients_to_variables, max_norm=5):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(
tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
grads_and_vars.append((grad, var))
return grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def set_up_optimizer(loss, optimizer, params, clip_gradients):
opt = {
'adam': tf.train.AdamOptimizer,
'sgd': tf.train.GradientDescentOptimizer,
'momentum': tf.train.MomentumOptimizer,
'adadelta': tf.train.AdadeltaOptimizer,
'adagrad': tf.train.AdagradOptimizer,
'rmsprop': tf.train.RMSPropOptimizer
}[optimizer](**params)
# optionally clip gradients by norm
grads_and_vars = opt.compute_gradients(loss)
if clip_gradients is not None:
grads_and_vars = [(tf.clip_by_norm(grad, clip_gradients), var)
for grad, var in grads_and_vars]
return opt, opt.apply_gradients(grads_and_vars)
def apply_gradients(self, var_list, accum_grad_list, name=None):
update_ops = []
with tf.device(self._device):
with tf.control_dependencies(None):
self._create_slots(var_list)
with tf.name_scope(name, self._name, []) as name:
self._prepare()
for var, accum_grad in zip(var_list, accum_grad_list):
with tf.name_scope("update_" + var.op.name), tf.device(var.device):
clipped_accum_grad = tf.clip_by_norm(accum_grad, self._clip_norm)
update_ops.append(self._apply_dense(clipped_accum_grad, var))
return update_ops;
#return tf.group(*update_ops, name=name)
def build_train(self, total_loss):
with self.G.as_default():
self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
# can't use opt.minimize because we need to clip the gradients
grads_and_vars = self.opt.compute_gradients(self.loss)
grads_and_vars = [(tf.clip_by_norm(g, self.max_grad_norm), v) for g,v in grads_and_vars]
grads_and_vars = [(add_gradient_noise(g), v) for g,v in grads_and_vars]
nil_grads_and_vars = []
for g, v in grads_and_vars:
if v.name in self.nil_vars:
nil_grads_and_vars.append((zero_nil_slot(g), v))
else:
nil_grads_and_vars.append((g, v))
self.train_op = self.opt.apply_gradients(nil_grads_and_vars, name="train_op")
return self.train_op
def minimize_and_clip(optimizer, objective, var_list = None, clip_val=10, exclude = None):
"""
Minimized `objective` using `optimizer` w.r.t. variables in
`var_list` while ensure the norm of the gradients for each
variable is clipped to `clip_val`
"""
gradients = optimizer.compute_gradients(objective, var_list=var_list)
for i, (grad, var) in enumerate(gradients):
if grad is not None:
#gradients[i] = (tf.clip_by_value(grad, -clip_val, clip_val), var)
if (exclude is None) or (var not in exclude):
gradients[i] = (tf.clip_by_norm(grad, clip_val), var)
return optimizer.apply_gradients(gradients)
############################
# Other NN Related
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def __call__(self, inputs, center_state, module_state):
"""
:return: output, new_center_features, new_module_state
"""
with tf.variable_scope(self.name):
reading_weights = tf.get_variable('reading_weights',shape=[self.center_size,self.context_input_size],initializer=tf.truncated_normal_initializer(stddev=0.1))
context_input = tf.matmul(center_state, tf.clip_by_norm(reading_weights,1.0))
inputs = tf.concat([inputs, context_input], axis=1) if self.input_size else context_input
inputs = tf.contrib.layers.fully_connected(inputs, num_outputs=self.center_output_size)
gru = tf.nn.rnn_cell.GRUCell(self.num_gru_units)
gru_output, new_module_state = gru(inputs=inputs, state=module_state)
output, center_feature_output = tf.split(gru_output,
[self.output_size, self.center_output_size],
axis=1) if self.output_size else (None, gru_output)
return output, center_feature_output, new_module_state
def train_step(x_batch, y_batch):
"""
A single training step
"""
feed_dict = {
lstm.input_x: x_batch,
lstm.input_y: y_batch,
lstm.dropout_keep_prob: FLAGS.dropout_keep_prob,
lstm.batch_size: FLAGS.batch_size,
lstm.pad: np.zeros([FLAGS.batch_size, 1, FLAGS.embedding_dim, 1]),
lstm.real_len: real_len(x_batch),
}
_, step, summaries, loss, accuracy = sess.run(
[train_op, global_step, train_summary_op, lstm.loss, lstm.accuracy],
feed_dict)
#lstm.W = tf.clip_by_norm(lstm.W, 3)
print("TRAIN step {}, loss {:g}, acc {:g}".format(step, loss, accuracy))
train_summary_writer.add_summary(summaries, step)
def train_step(x_batch, y_batch):
"""
A single training step
"""
feed_dict = {
lstm.input_x: x_batch,
lstm.input_y: y_batch,
lstm.dropout_keep_prob: FLAGS.dropout_keep_prob,
lstm.batch_size: FLAGS.batch_size,
lstm.real_len: real_len(x_batch)
}
_, step, summaries, loss, accuracy = sess.run(
[train_op, global_step, train_summary_op, lstm.loss, lstm.accuracy],
feed_dict)
lstm.W = tf.clip_by_norm(lstm.W, 3)
time_str = datetime.datetime.now().isoformat()
print("TRAIN step {}, loss {:g}, acc {:g}".format(step, loss, accuracy))
train_summary_writer.add_summary(summaries, step)
RCNNModelWithLSTM.py 文件源码
项目:DeeplearningForTextClassification
作者: zldeng
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def train(self):
learning_rate = tf.train.exponential_decay(self.learning_rate,
self.global_step,self.decay_steps,
self.decay_rate,staircase = True)
#use grad_clip to hand exploding or vanishing gradients
optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(self.loss_val)
for idx ,(grad,var) in enumerate(grads_and_vars):
if grad is not None:
grads_and_vars[idx] = (tf.clip_by_norm(grad,self.grad_clip),var)
train_op = optimizer.apply_gradients(grads_and_vars, global_step = self.global_step)
return train_op
def train(self):
learning_rate = tf.train.exponential_decay(self.learning_rate,
self.global_step,self.decay_steps,
self.decay_rate,staircase = True)
#use grad_clip to hand exploding or vanishing gradients
optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(self.loss_val)
for idx ,(grad,var) in enumerate(grads_and_vars):
if grad is not None:
grads_and_vars[idx] = (tf.clip_by_norm(grad,self.grad_clip),var)
train_op = optimizer.apply_gradients(grads_and_vars, global_step = self.global_step)
return train_op