def add_training_op(self, loss):
#optimizer = tf.train.AdamOptimizer(self.config.lr)
#optimizer = tf.train.AdagradOptimizer(self.config.lr)
optclass = getattr(tf.train, self.config.optimizer + 'Optimizer')
assert issubclass(optclass, tf.train.Optimizer)
optimizer = optclass(self.config.learning_rate)
gradient_var_pairs = optimizer.compute_gradients(loss)
vars = [x[1] for x in gradient_var_pairs]
gradients = [x[0] for x in gradient_var_pairs]
if self.config.gradient_clip > 0:
clipped, _ = tf.clip_by_global_norm(gradients, self.config.gradient_clip)
else:
clipped = gradients
self.grad_norm = tf.global_norm(clipped)
train_op = optimizer.apply_gradients(zip(clipped, vars))
return train_op
python类global_norm()的实例源码
base_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def setup_train_op(self):
"""
Add train_op to self
"""
with tf.variable_scope("train_step"):
adam_optimizer = tf.train.AdamOptimizer()
grads, vars = zip(*adam_optimizer.compute_gradients(self.loss))
clip_val = self.config.max_gradient_norm
# if -1 then do not perform gradient clipping
if clip_val != -1:
clipped_grads, _ = tf.clip_by_global_norm(grads, self.config.max_gradient_norm)
self.global_grad = tf.global_norm(clipped_grads)
self.gradients = zip(clipped_grads, vars)
else:
self.global_grad = tf.global_norm(grads)
self.gradients = zip(grads, vars)
self.train_op = adam_optimizer.apply_gradients(self.gradients)
self.init = tf.global_variables_initializer()
def build_graph(self, weights, loss=None, optimizer=None, norm=False, batch_size=None, grad_ys=None):
if loss is not None:
gradients = tf.gradients(loss.node, list(utils.Utils.flatten(weights.node)), grad_ys)
gradients = [tf.check_numerics(g, 'gradient_%d' % i) for i, g in enumerate(gradients)]
if batch_size is not None:
gradients = [g / float(batch_size) for g in gradients]
# store gradients global norm before clipping
self.global_norm = tf.global_norm(gradients)
# clip gradients after global norm has been stored
if norm:
gradients, _ = tf.clip_by_global_norm(gradients, norm)
self.calculate = graph.TfNode(utils.Utils.reconstruct(gradients, weights.node))
if optimizer is not None:
self.ph_gradients = graph.Placeholders(weights)
self.apply = graph.TfNode(optimizer.node.apply_gradients(
utils.Utils.izip(self.ph_gradients.checked, weights.node)))
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _numerically_stable_global_norm(tensor_list):
"""Compute the global norm of a list of Tensors, with improved stability.
The global norm computation sometimes overflows due to the intermediate L2
step. To avoid this, we divide by a cheap-to-compute max over the
matrix elements.
Args:
tensor_list: A list of tensors, or `None`.
Returns:
A scalar tensor with the global norm.
"""
if np.all([x is None for x in tensor_list]):
return 0.0
list_max = tf.reduce_max([tf.reduce_max(tf.abs(x)) for x in
tensor_list if x is not None])
return list_max * tf.global_norm([x / list_max for x in tensor_list
if x is not None])
def summary_gradients(grad_vars, summary_types, collections=None):
"""
Add summary to all gradient tensors
Args:
grads_vars: grads and vars list
summary_type: a list of all sumary types to add
e.g.: ['scalar', 'histogram', 'sparsity', 'mean', 'rms', 'stddev', 'norm', 'max', 'min']
collections: training or validation collections
"""
with tf.name_scope('summary/gradient'):
for grad, var in grad_vars:
ndims = grad.get_shape().ndims
for s_type in summary_types:
summary_param(s_type, grad, ndims, var.op.name +
'/grad', collections=None)
try:
tf.summary.scalar('/global_norm', tf.global_norm(
map(lambda grad_v: grad_v[0], grad_vars)), collections=collections)
except Exception:
return
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.histogram_summary(var.op.name + ':gradient',
grad_values))
summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
model_deploy.py 文件源码
项目:Embarrassingly-Parallel-Image-Classification
作者: Azure
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.histogram_summary(var.op.name + ':gradient',
grad_values))
summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.histogram_summary(var.op.name + ':gradient',
grad_values))
summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def add_train_op(self, loss):
self.global_step = tf.Variable(0, name='global_step', trainable=False)
opt = tf.train.AdamOptimizer(learning_rate=self.lr)
gradients, variables = zip(*opt.compute_gradients(loss))
# save selected gradient summaries
#for grad in gradients:
#if 'BasicDecoder' in grad.name or 'gru_cell' in grad.name or 'highway_3' in grad.name:
#tf.summary.scalar(grad.name, tf.reduce_sum(grad))
# optionally cap and noise gradients to regularize
if self.config.cap_grads > 0:
with tf.variable_scope('cap_grads'):
tf.summary.scalar('global_gradient_norm', tf.global_norm(gradients))
gradients, _ = tf.clip_by_global_norm(gradients, self.config.cap_grads)
train_op = opt.apply_gradients(zip(gradients, variables), global_step=self.global_step)
return train_op
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.histogram_summary(var.op.name + ':gradient',
grad_values))
summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def _build_gradient(self, target):
"""
Local gradient for remote vars
"""
local_grad = tf.gradients(self.total_loss, self.get_trainable_weights())
self.for_summary_scalar += [tf.global_norm(local_grad, name='grad_norm'),
tf.global_norm(self.get_trainable_weights(), name='vars_norm')]
# clip grad by norm
local_grad, _ = tf.clip_by_global_norm(local_grad, self.clip_grad_norm)
# mix with remote vars
remote_vars = target.get_trainable_weights()
assert len(local_grad) == len(remote_vars)
vars_and_grads = list(zip(local_grad, remote_vars))
# each worker has a different set of adam optimizer parameters
optimizer = tf.train.AdamOptimizer(self.lr)
# apply
apply_grad = optimizer.apply_gradients(vars_and_grads)
inc_step = self.global_step.assign_add(tf.shape(self.x)[0])
self.train_op = tf.group(apply_grad, inc_step)
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.histogram_summary(var.op.name + ':gradient',
grad_values))
summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
def gradient_summaries(gvs, norm=True, ratio=True, histogram=True):
"""Register gradient summaries.
Logs the global norm of the gradient, ratios of gradient_norm/uariable_norm and
histograms of gradients.
:param gvs: list of (gradient, variable) tuples
:param norm: boolean, logs norm of the gradient if True
:param ratio: boolean, logs ratios if True
:param histogram: boolean, logs gradient histograms if True
"""
with tf.name_scope('grad_summary'):
if norm:
grad_norm = tf.global_norm([gv[0] for gv in gvs])
tf.summary.scalar('grad_norm', grad_norm)
for g, v in gvs:
var_name = v.name.split(':')[0]
if g is None:
print 'Gradient for variable {} is None'.format(var_name)
continue
if ratio:
log_ratio((g, v), '/'.join(('grad_ratio', var_name)))
if histogram:
tf.summary.histogram('/'.join(('grad_hist', var_name)), g)
def clip_gradient(pair_list,
max_norm):
"""Perform gradient clipping.
If the gradients' global norm exceed 'max_norm', then shrink it to 'max_norm'.
:param pair_list: (grad, var) pair list.
:param max_norm: The max global norm.
:return: (grad, var) pair list, the original gradients' norm, the clipped gradients' norm
"""
grad_list = [grad for grad, _ in pair_list]
grad_list, raw_grad = tf.clip_by_global_norm(grad_list, max_norm)
grad = tf.global_norm(grad_list)
pair_list = [(grad, pair[1]) for grad, pair in zip(grad_list, pair_list)]
return pair_list, raw_grad, grad
def _backward(self, loss, summaries=False):
hps = self.hps
loss = loss * hps.num_steps
emb_vars = find_trainable_variables("emb")
lstm_vars = find_trainable_variables("LSTM")
softmax_vars = find_trainable_variables("softmax")
all_vars = emb_vars + lstm_vars + softmax_vars
grads = tf.gradients(loss, all_vars)
orig_grads = grads[:]
emb_grads = grads[:len(emb_vars)]
grads = grads[len(emb_vars):]
for i in range(len(emb_grads)):
assert isinstance(emb_grads[i], tf.IndexedSlices)
emb_grads[i] = tf.IndexedSlices(emb_grads[i].values * hps.batch_size, emb_grads[i].indices,
emb_grads[i].dense_shape)
lstm_grads = grads[:len(lstm_vars)]
softmax_grads = grads[len(lstm_vars):]
lstm_grads, lstm_norm = tf.clip_by_global_norm(lstm_grads, hps.max_grad_norm)
clipped_grads = emb_grads + lstm_grads + softmax_grads
assert len(clipped_grads) == len(orig_grads)
if summaries:
tf.scalar_summary("model/lstm_grad_norm", lstm_norm)
tf.scalar_summary("model/lstm_grad_scale", tf.minimum(hps.max_grad_norm / lstm_norm, 1.0))
tf.scalar_summary("model/lstm_weight_norm", tf.global_norm(lstm_vars))
# for v, g, cg in zip(all_vars, orig_grads, clipped_grads):
# name = v.name.lstrip("model/")
# tf.histogram_summary(name + "/var", v)
# tf.histogram_summary(name + "/grad", g)
# tf.histogram_summary(name + "/clipped_grad", cg)
return list(zip(clipped_grads, all_vars))
def build_graph(self, *layers):
weights = [layer.weight.node for layer in layers]
self.ph_weights = graph.Placeholders(variables=graph.TfNode(weights))
self.assign = graph.TfNode([tf.assign(variable, value) for variable, value in
utils.Utils.izip(weights, self.ph_weights.checked)])
self.check = graph.TfNode(tf.group(*[tf.check_numerics(w, 'weight_%d' % i) for i, w in
enumerate(utils.Utils.flatten(weights))]))
self.global_norm = tf.global_norm(list(utils.Utils.flatten(weights)))
return weights