def get_cubic_root(self):
# We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
# where x = sqrt(mu).
# We substitute x, which is sqrt(mu), with x = y + 1.
# It gives y^3 + py = q
# where p = (D^2 h_min^2)/(2*C) and q = -p.
# We use the Vieta's substution to compute the root.
# There is only one real solution y (which is in [0, 1] ).
# http://mathworld.wolfram.com/VietasSubstitution.html
# assert_array = \
# [tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg) ), [self._dist_to_opt_avg,]),
# tf.Assert(tf.logical_not(tf.is_nan(self._h_min) ), [self._h_min,]),
# tf.Assert(tf.logical_not(tf.is_nan(self._grad_var) ), [self._grad_var,]),
# tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg) ), [self._dist_to_opt_avg,]),
# tf.Assert(tf.logical_not(tf.is_inf(self._h_min) ), [self._h_min,]),
# tf.Assert(tf.logical_not(tf.is_inf(self._grad_var) ), [self._grad_var,])]
# with tf.control_dependencies(assert_array):
# EPS in the numerator to prevent momentum being exactly one in case of 0 gradient
p = (self._dist_to_opt_avg + EPS)**2 * (self._h_min + EPS)**2 / 2 / (self._grad_var + EPS)
w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0)
y = w - p / 3.0 / (w + EPS)
x = y + 1
return x
python类is_nan()的实例源码
def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
"""Filters out groundtruth with no bounding boxes.
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
Returns:
a dictionary of tensors containing only the groundtruth that have bounding
boxes.
"""
groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32(
tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0)
valid_indicator_vector = tf.logical_not(nan_indicator_vector)
valid_indices = tf.where(valid_indicator_vector)
return retain_groundtruth(tensor_dict, valid_indices)
def _cosine_distance(M, k):
# this is equation (6), or as I like to call it: The NaN factory.
# TODO: Find it in a library (keras cosine loss?)
# normalizing first as it is better conditioned.
nk = K.l2_normalize(k, axis=-1)
nM = K.l2_normalize(M, axis=-1)
cosine_distance = K.batch_dot(nM, nk)
# TODO: Do succesfull error handling
#cosine_distance_error_handling = tf.Print(cosine_distance, [cosine_distance], message="NaN occured in _cosine_distance")
#cosine_distance_error_handling = K.ones(cosine_distance_error_handling.shape)
#cosine_distance = tf.case({K.any(tf.is_nan(cosine_distance)) : (lambda: cosine_distance_error_handling)},
# default = lambda: cosine_distance, strict=True)
return cosine_distance
def _nan2zero(x):
return tf.where(tf.is_nan(x), tf.zeros_like(x), x)
def _nan2inf(x):
return tf.where(tf.is_nan(x), tf.zeros_like(x)+np.inf, x)
def _nelem(x):
nelem = tf.reduce_sum(tf.cast(~tf.is_nan(x), tf.float32))
return tf.cast(tf.where(tf.equal(nelem, 0.), 1., nelem), x.dtype)
def _add_loss_summaries(self, total_loss):
"""Add summaries for losses in ip5wke model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total
# loss
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
accuracies = tf.get_collection('accuracies')
#for a in accuracies:
#tf.summary.scalar('accuracy', a)
# Attach a scalar summary to all individual losses and the total loss;
# do the same for the averaged version of the losses.
for l in losses + [total_loss]:
#Name each loss as '(raw)' and name the moving average version of
#the loss as the original loss name.
tf.summary.scalar(l.op.name + ' (raw)',
tf.where(tf.is_nan(l), 0.0, l))
tf.summary.scalar(l.op.name, loss_averages.average(l))
return loss_averages_op
def _add_loss_summaries(self, total_loss):
"""Add summaries for losses in ip5wke model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total
# loss
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
accuracies = tf.get_collection('accuracies')
#for a in accuracies:
#tf.summary.scalar('accuracy', a)
# Attach a scalar summary to all individual losses and the total loss;
# do the same for the averaged version of the losses.
for l in losses + [total_loss]:
#Name each loss as '(raw)' and name the moving average version of
#the loss as the original loss name.
tf.summary.scalar(l.op.name + ' (raw)',
tf.where(tf.is_nan(l), 0.0, l))
tf.summary.scalar(l.op.name, loss_averages.average(l))
return loss_averages_op
def _add_loss_summaries(self, total_loss):
"""Add summaries for losses in ip5wke model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total
# loss
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
accuracies = tf.get_collection('accuracies')
for a in accuracies:
tf.summary.scalar('accuracy', a)
# Attach a scalar summary to all individual losses and the total loss;
# do the same for the averaged version of the losses.
for l in losses + [total_loss]:
# Name each loss as '(raw)' and name the moving average version of
# the loss as the original loss name.
tf.summary.scalar(l.op.name + ' (raw)',
tf.where(tf.is_nan(l), 0.0, l))
tf.summary.scalar(l.op.name, loss_averages.average(l))
return loss_averages_op
def _add_loss_summaries(self, total_loss):
"""Add summaries for losses in ip5wke model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total
# loss
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
accuracies = tf.get_collection('accuracies')
#for a in accuracies:
#tf.summary.scalar('accuracy', a)
# Attach a scalar summary to all individual losses and the total loss;
# do the same for the averaged version of the losses.
for l in losses + [total_loss]:
#Name each loss as '(raw)' and name the moving average version of
#the loss as the original loss name.
tf.summary.scalar(l.op.name + ' (raw)',
tf.where(tf.is_nan(l), 0.0, l))
tf.summary.scalar(l.op.name, loss_averages.average(l))
return loss_averages_op
def assert_no_nan(tensor):
return tf.assert_equal(tf.reduce_any(tf.is_nan(tensor)), False)
def train(self, loss, global_step):
num_batches_per_epoch = self.num_examples_per_epoch
decay_steps = int(num_batches_per_epoch * self.num_epochs_per_decay)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(self.initial_learning_rate,
global_step,
decay_steps,
self.learning_rate_decay_factor,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = self._add_loss_summaries(loss)
# Compute gradients.
with tf.variable_scope('calculate_gradients'):
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr, epsilon=self.adam_epsilon)
grads = opt.compute_gradients(loss)
# grads = [
# (tf.clip_by_value(tf.where(tf.is_nan(grad), tf.zeros_like(grad),
# grad), -1000.0, 1000.0), var) if grad is not None else
# (tf.zeros_like(var), var) for grad, var in grads]
# Apply gradients.
# grad_check = tf.check_numerics(grads, "NaN or Inf gradients found: ")
# with tf.control_dependencies([grad_check]):
apply_gradient_op = opt.apply_gradients(grads,
global_step=global_step)
# Add histograms for trainable variables.
# for var in tf.trainable_variables():
# #tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
# for grad, var in grads:
# if grad is not None:
# #tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
# variable_averages = tf.train.ExponentialMovingAverage(
# self.moving_average_decay, global_step)
# variables_averages_op = variable_averages.apply(
# tf.trainable_variables())
with tf.control_dependencies(
[apply_gradient_op]):#, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def train(self, loss, global_step):
num_batches_per_epoch = self.num_examples_per_epoch
decay_steps = int(num_batches_per_epoch * self.num_epochs_per_decay)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(self.initial_learning_rate,
global_step,
decay_steps,
self.learning_rate_decay_factor,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = self._add_loss_summaries(loss)
# Compute gradients.
with tf.variable_scope('calculate_gradients'):
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr, epsilon=self.adam_epsilon)
grads = opt.compute_gradients(loss)
# grads = [
# (tf.clip_by_value(tf.where(tf.is_nan(grad), tf.zeros_like(grad),
# grad), -1000.0, 1000.0), var) if grad is not None else
# (tf.zeros_like(var), var) for grad, var in grads]
# Apply gradients.
# grad_check = tf.check_numerics(grads, "NaN or Inf gradients found: ")
# with tf.control_dependencies([grad_check]):
apply_gradient_op = opt.apply_gradients(grads,
global_step=global_step)
# Add histograms for trainable variables.
# for var in tf.trainable_variables():
# #tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
# for grad, var in grads:
# if grad is not None:
# #tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
# variable_averages = tf.train.ExponentialMovingAverage(
# self.moving_average_decay, global_step)
# variables_averages_op = variable_averages.apply(
# tf.trainable_variables())
with tf.control_dependencies(
[apply_gradient_op]):#, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def train(self, loss, global_step):
num_batches_per_epoch = self.num_examples_per_epoch / self.batch_size
decay_steps = int(num_batches_per_epoch * self.num_epochs_per_decay)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(self.initial_learning_rate,
global_step,
decay_steps,
self.learning_rate_decay_factor,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = self._add_loss_summaries(loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr, epsilon=self.adam_epsilon)
grads = opt.compute_gradients(loss)
grads = [
(tf.clip_by_norm(tf.where(tf.is_nan(grad), tf.zeros_like(grad),
grad), 5.0),
var)
for grad,
var in
grads]
# Apply gradients.
# grad_check = tf.check_numerics(grads, "NaN or Inf gradients found: ")
# with tf.control_dependencies([grad_check]):
apply_gradient_op = opt.apply_gradients(grads,
global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
for grad, var in grads:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
self.moving_average_decay, global_step)
variables_averages_op = variable_averages.apply(
tf.trainable_variables())
with tf.control_dependencies(
[apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def train(self, loss, global_step):
num_batches_per_epoch = self.num_examples_per_epoch
decay_steps = int(num_batches_per_epoch * self.num_epochs_per_decay)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(self.initial_learning_rate,
global_step,
decay_steps,
self.learning_rate_decay_factor,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = self._add_loss_summaries(loss)
# Compute gradients.
with tf.variable_scope('calculate_gradients'):
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr, epsilon=self.adam_epsilon)
grads = opt.compute_gradients(loss)
# grads = [
# (tf.clip_by_value(tf.where(tf.is_nan(grad), tf.zeros_like(grad),
# grad), -1000.0, 1000.0), var) if grad is not None else
# (tf.zeros_like(var), var) for grad, var in grads]
# Apply gradients.
# grad_check = tf.check_numerics(grads, "NaN or Inf gradients found: ")
# with tf.control_dependencies([grad_check]):
apply_gradient_op = opt.apply_gradients(grads,
global_step=global_step)
# Add histograms for trainable variables.
# for var in tf.trainable_variables():
# #tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
# for grad, var in grads:
# if grad is not None:
# #tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
# variable_averages = tf.train.ExponentialMovingAverage(
# self.moving_average_decay, global_step)
# variables_averages_op = variable_averages.apply(
# tf.trainable_variables())
with tf.control_dependencies(
[apply_gradient_op]):#, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def __init__(self, batch_size, vocab_size, encoding_size, embedding_size,
num_glimpses = 8,
grad_norm_clip = 5.,
l2_reg_coef=1e-4,
session=tf.Session(),
name='AlternatingAttention'):
"""
Creates an iterative alternating attention network as described in https://arxiv.org/abs/1606.02245
"""
self._batch_size = batch_size
self._vocab_size = vocab_size
self._encode_size = encoding_size
self._infer_size = 4 * encoding_size
self._embedding_size = embedding_size
self._num_glimpses = num_glimpses
self._sess = session
self._name = name
self._build_placeholders()
self._build_variables()
# Regularization
tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(l2_reg_coef), [self._embeddings])
# Answer probability
doc_attentions = self._inference(self._docs, self._queries)
nans = tf.reduce_sum(tf.to_float(tf.is_nan(doc_attentions)))
self._doc_attentions = doc_attentions
ans_mask = tf.to_float(tf.equal(tf.expand_dims(self._answers, -1), self._docs))
P_a = tf.reduce_sum(ans_mask * doc_attentions, 1)
loss_op = -tf.reduce_mean(tf.log(P_a + tf.constant(0.00001)))
self._loss_op = loss_op
# Optimizer and gradients
with tf.name_scope("optimizer"):
self._opt = tf.train.AdamOptimizer(learning_rate=self._learning_rate)
grads_and_vars = self._opt.compute_gradients(loss_op)
capped_grads_and_vars = [(tf.clip_by_norm(g, grad_norm_clip), v) for g,v in grads_and_vars]
self._train_op = self._opt.apply_gradients(capped_grads_and_vars, global_step=self._global_step)
tf.summary.scalar('loss', self._loss_op)
tf.summary.scalar('learning_rate', self._learning_rate)
tf.summary.histogram('answer_probability', P_a)
self._summary_op = tf.summary.merge_all()
self._sess.run(tf.global_variables_initializer())
def retain_boxes_above_threshold(
boxes, labels, label_scores, masks=None, keypoints=None, threshold=0.0):
"""Retains boxes whose label score is above a given threshold.
If the label score for a box is missing (represented by NaN), the box is
retained. The boxes that don't pass the threshold will not appear in the
returned tensor.
Args:
boxes: float32 tensor of shape [num_instance, 4] representing boxes
location in normalized coordinates.
labels: rank 1 int32 tensor of shape [num_instance] containing the object
classes.
label_scores: float32 tensor of shape [num_instance] representing the
score for each box.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks are of
the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
threshold: scalar python float.
Returns:
retained_boxes: [num_retained_instance, 4]
retianed_labels: [num_retained_instance]
retained_label_scores: [num_retained_instance]
If masks, or keypoints are not None, the function also returns:
retained_masks: [num_retained_instance, height, width]
retained_keypoints: [num_retained_instance, num_keypoints, 2]
"""
with tf.name_scope('RetainBoxesAboveThreshold',
values=[boxes, labels, label_scores]):
indices = tf.where(
tf.logical_or(label_scores > threshold, tf.is_nan(label_scores)))
indices = tf.squeeze(indices, axis=1)
retained_boxes = tf.gather(boxes, indices)
retained_labels = tf.gather(labels, indices)
retained_label_scores = tf.gather(label_scores, indices)
result = [retained_boxes, retained_labels, retained_label_scores]
if masks is not None:
retained_masks = tf.gather(masks, indices)
result.append(retained_masks)
if keypoints is not None:
retained_keypoints = tf.gather(keypoints, indices)
result.append(retained_keypoints)
return result