def masked_apply(tensor, op, mask):
"""Applies `op` to tensor only at locations indicated by `mask` and sets the rest to zero.
Similar to doing `tensor = tf.where(mask, op(tensor), tf.zeros_like(tensor))` but it behaves correctly
when `op(tensor)` is NaN or inf while tf.where does not.
:param tensor: tf.Tensor
:param op: tf.Op
:param mask: tf.Tensor with dtype == bool
:return: tf.Tensor
"""
chosen = tf.boolean_mask(tensor, mask)
applied = op(chosen)
idx = tf.to_int32(tf.where(mask))
result = tf.scatter_nd(idx, applied, tf.shape(tensor))
return result
python类to_int32()的实例源码
def cross_entropy_sequence_loss(logits, targets, sequence_length):
"""Calculates the per-example cross-entropy loss for a sequence of logits and
masks out all losses passed the sequence length.
Args:
logits: Logits of shape `[T, B, vocab_size]`
targets: Target classes of shape `[T, B]`
sequence_length: An int32 tensor of shape `[B]` corresponding
to the length of each input
Returns:
A tensor of shape [T, B] that contains the loss per example, per time step.
"""
with tf.name_scope("cross_entropy_sequence_loss"):
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=targets)
# Mask out the losses we don't care about
loss_mask = tf.sequence_mask(
tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0]))
losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0])
return losses
def mu_law_encode_nonlinear(audio, quantization_channels=256):
'''
Compress the waveform amplitudes using mu-law non-linearity.
NOTE: This mu-law functions as a non-linear function as opposed to
quantization.
'''
with tf.name_scope('encode'):
mu = tf.to_float(quantization_channels - 1)
# Perform mu-law companding transformation (ITU-T, 1988).
# Minimum operation is here to deal with rare large amplitudes caused
# by resampling.
safe_audio_abs = tf.minimum(tf.abs(audio), 1.0)
magnitude = tf.log1p(mu * safe_audio_abs) / tf.log1p(mu)
signal = tf.multiply(tf.sign(audio), magnitude, name='mulaw')
# Quantize signal to the specified number of levels.
# return tf.to_int32((signal + 1) / 2 * mu + 0.5)
return signal
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bboxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
if cfg.RESNET.MAX_POOL:
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size],
name="crops")
crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
else:
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE],
name="crops")
return crops
# Do the first few layers manually, because 'SAME' padding can behave inconsistently
# for images of different sizes: sometimes 0, sometimes 1
def _anchor_target_layer(self, rpn_cls_score, name):
with tf.variable_scope(name) as scope:
rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = tf.py_func(
anchor_target_layer,
[rpn_cls_score, self._gt_boxes, self._im_info, self._feat_stride, self._anchors, self._num_anchors],
[tf.float32, tf.float32, tf.float32, tf.float32],
name="anchor_target")
rpn_labels.set_shape([1, 1, None, None])
rpn_bbox_targets.set_shape([1, None, None, self._num_anchors * 4])
rpn_bbox_inside_weights.set_shape([1, None, None, self._num_anchors * 4])
rpn_bbox_outside_weights.set_shape([1, None, None, self._num_anchors * 4])
rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
self._anchor_targets['rpn_labels'] = rpn_labels
self._anchor_targets['rpn_bbox_targets'] = rpn_bbox_targets
self._anchor_targets['rpn_bbox_inside_weights'] = rpn_bbox_inside_weights
self._anchor_targets['rpn_bbox_outside_weights'] = rpn_bbox_outside_weights
self._score_summaries.update(self._anchor_targets)
return rpn_labels
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
def _anchor_target_layer(self, rpn_cls_score, name):
with tf.variable_scope(name) as scope:
rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = tf.py_func(
anchor_target_layer,
[rpn_cls_score, self._gt_boxes, self._im_info, self._feat_stride, self._anchors, self._num_anchors],
[tf.float32, tf.float32, tf.float32, tf.float32],
name="anchor_target")
rpn_labels.set_shape([1, 1, None, None])
rpn_bbox_targets.set_shape([1, None, None, self._num_anchors * 4])
rpn_bbox_inside_weights.set_shape([1, None, None, self._num_anchors * 4])
rpn_bbox_outside_weights.set_shape([1, None, None, self._num_anchors * 4])
rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
self._anchor_targets['rpn_labels'] = rpn_labels
self._anchor_targets['rpn_bbox_targets'] = rpn_bbox_targets
self._anchor_targets['rpn_bbox_inside_weights'] = rpn_bbox_inside_weights
self._anchor_targets['rpn_bbox_outside_weights'] = rpn_bbox_outside_weights
self._score_summaries.update(self._anchor_targets)
return rpn_labels
def _proposal_target_layer(self, rois, roi_scores, name):
with tf.variable_scope(name) as scope:
rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = tf.py_func(
proposal_target_layer,
[rois, roi_scores, self._gt_boxes, self._num_classes],
[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32],
name="proposal_target")
rois.set_shape([cfg.TRAIN.BATCH_SIZE, 5])
roi_scores.set_shape([cfg.TRAIN.BATCH_SIZE])
labels.set_shape([cfg.TRAIN.BATCH_SIZE, 1])
bbox_targets.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
bbox_inside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
bbox_outside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
self._proposal_targets['rois'] = rois
self._proposal_targets['labels'] = tf.to_int32(labels, name="to_int32")
self._proposal_targets['bbox_targets'] = bbox_targets
self._proposal_targets['bbox_inside_weights'] = bbox_inside_weights
self._proposal_targets['bbox_outside_weights'] = bbox_outside_weights
self._score_summaries.update(self._proposal_targets)
return rois, roi_scores
def _anchor_component(self):
with tf.variable_scope('ANCHOR_' + self._tag) as scope:
# just to get the shape right
height = tf.to_int32(tf.ceil(self._im_info[0] / np.float32(self._feat_stride[0])))
width = tf.to_int32(tf.ceil(self._im_info[1] / np.float32(self._feat_stride[0])))
anchors, anchor_length = tf.py_func(generate_anchors_pre,
[height, width,
self._feat_stride, self._anchor_scales, self._anchor_ratios],
[tf.float32, tf.int32], name="generate_anchors")
anchors.set_shape([None, 4])
anchor_length.set_shape([])
self._anchors = anchors
self._anchor_length = anchor_length
# [Hand Detection] Batch normalization
# http://stackoverflow.com/a/34634291/2267819
# Note that this is different from the paper(they use another method)
def _parse_example(self, serialized):
"""Unpack a serialized example to Tensor."""
feats = self._get_data_features()
sz_feats = self._get_sz_features()
for s in sz_feats:
feats[s] = sz_feats[s]
sample = tf.parse_single_example(serialized, features=feats)
data = {}
for i, f in enumerate(self.FEATURES):
s = tf.to_int32(sample[f+'_sz'])
data[f] = tf.decode_raw(sample[f], self.dtypes[f], name='decode_{}'.format(f))
data[f] = tf.reshape(data[f], s)
return data
def _decode_lambda(self, args):
"""
Decoding within tensorflow graph.
In case kenlm_directory is specified, a modified version of tensorflow
(available at https://github.com/timediv/tensorflow-with-kenlm)
is needed to run that extends ctc_decode to use a kenlm decoder.
:return:
Most probable decoded sequence. Important: blank labels are returned as `-1`.
"""
import tensorflow as tf
prediction_batch, prediction_lengths = args
log_prediction_batch = tf.log(tf.transpose(prediction_batch, perm=[1, 0, 2]) + 1e-8)
prediction_length_batch = tf.to_int32(tf.squeeze(prediction_lengths, axis=[1]))
(decoded, log_prob) = self.ctc_get_decoded_and_log_probability_batch(log_prediction_batch,
prediction_length_batch)
return single([tf.sparse_to_dense(st.indices, st.dense_shape, st.values, default_value=-1) for st in decoded])
def process_image(img, scale, isotropic, crop, mean):
'''Crops, scales, and normalizes the given image.
scale : The image wil be first scaled to this size.
If isotropic is true, the smaller side is rescaled to this,
preserving the aspect ratio.
crop : After scaling, a central crop of this size is taken.
mean : Subtracted from the image
'''
# Rescale
if isotropic:
img_shape = tf.to_float(tf.shape(img)[:2])
min_length = tf.minimum(img_shape[0], img_shape[1])
new_shape = tf.to_int32((scale / min_length) * img_shape)
else:
new_shape = tf.pack([scale, scale])
img = tf.image.resize_images(img, new_shape[0], new_shape[1])
# Center crop
# Use the slice workaround until crop_to_bounding_box supports deferred tensor shapes
# See: https://github.com/tensorflow/tensorflow/issues/521
offset = (new_shape - crop) / 2
img = tf.slice(img, begin=tf.pack([offset[0], offset[1], 0]), size=tf.pack([crop, crop, -1]))
# Mean subtraction
return tf.to_float(img) - mean
def _largest_size_at_most(height, width, largest_side):
"""Computes new shape with the largest side equal to `largest_side`.
Computes new shape with the largest side equal to `largest_side` while
preserving the original aspect ratio.
Args:
height: an int32 scalar tensor indicating the current height.
width: an int32 scalar tensor indicating the current width.
largest_side: A python integer or scalar `Tensor` indicating the size of
the largest side after resize.
Returns:
new_height: an int32 scalar tensor indicating the new height.
new_width: and int32 scalar tensor indicating the new width.
"""
largest_side = tf.convert_to_tensor(largest_side, dtype=tf.int32)
height = tf.to_float(height)
width = tf.to_float(width)
largest_side = tf.to_float(largest_side)
scale = tf.cond(tf.greater(height, width),
lambda: largest_side / height,
lambda: largest_side / width)
new_height = tf.to_int32(height * scale)
new_width = tf.to_int32(width * scale)
return new_height, new_width
def zoomout(image, gt_bboxes, params):
X_out = tf.random_uniform([], 1.05, params['X_out'])
h, w, _ = tf.unstack(tf.to_float(tf.shape(image)))
zoomout_color = params['zoomout_color']+[0]
bg_color = tf.constant(zoomout_color, dtype=tf.float32)
x_shift = tf.random_uniform([], 0, (X_out - 1) * w)
y_shift = tf.random_uniform([], 0, (X_out - 1) * h)
x2_shift = (X_out - 1) * w - x_shift
y2_shift = (X_out - 1) * h - y_shift
# somewhat hacky solution to pad with MEAN_COLOR
# tf.pad does not support custom constant padding unlike numpy
image -= bg_color
image = tf.pad(image, tf.to_int32([[y_shift, y2_shift], [x_shift, x2_shift], [0, 0]]))
image += bg_color
gt_x, gt_y, gt_w, gt_h = tf.unstack(gt_bboxes, axis=1)
gt_bboxes = tf.stack([gt_x + x_shift/w,
gt_y + y_shift/h,
gt_w, gt_h], axis=1)/X_out
return image, gt_bboxes
def depthCELoss2(pred, gt, weight, ss, outputChannels=16):
with tf.name_scope("depth_CE_loss"):
pred = tf.reshape(pred, (-1, outputChannels))
epsilon = tf.constant(value=1e-25)
predSoftmax = tf.to_float(tf.nn.softmax(pred))
gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32)
ss = tf.to_float(tf.reshape(ss, (-1, 1)))
weight = tf.to_float(tf.reshape(weight, (-1, 1)))
crossEntropyScaling = tf.to_float([3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
crossEntropy = -tf.reduce_sum(((1-gt)*tf.log(tf.maximum(1-predSoftmax, epsilon))
+ gt*tf.log(tf.maximum(predSoftmax, epsilon)))*ss*crossEntropyScaling*weight,
reduction_indices=[1])
crossEntropySum = tf.reduce_sum(crossEntropy, name="cross_entropy_sum")
return crossEntropySum
def depthCELoss2(pred, gt, weight, ss, outputChannels=16):
with tf.name_scope("depth_CE_loss"):
pred = tf.reshape(pred, (-1, outputChannels))
epsilon = tf.constant(value=1e-25)
predSoftmax = tf.to_float(tf.nn.softmax(pred))
gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32)
ss = tf.to_float(tf.reshape(ss, (-1, 1)))
weight = tf.to_float(tf.reshape(weight, (-1, 1)))
crossEntropyScaling = tf.to_float([3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
crossEntropy = -tf.reduce_sum(((1-gt)*tf.log(tf.maximum(1-predSoftmax, epsilon))
+ gt*tf.log(tf.maximum(predSoftmax, epsilon)))*ss*crossEntropyScaling*weight,
reduction_indices=[1])
crossEntropySum = tf.reduce_sum(crossEntropy, name="cross_entropy_sum")
return crossEntropySum
def _deepfool2(model, x, epochs, eta, clip_min, clip_max, min_prob):
y0 = tf.stop_gradient(tf.reshape(model(x), [-1])[0])
y0 = tf.to_int32(tf.greater(y0, 0.5))
def _cond(i, z):
xadv = tf.clip_by_value(x + z*(1+eta), clip_min, clip_max)
y = tf.stop_gradient(tf.reshape(model(xadv), [-1])[0])
y = tf.to_int32(tf.greater(y, 0.5))
return tf.logical_and(tf.less(i, epochs), tf.equal(y0, y))
def _body(i, z):
xadv = tf.clip_by_value(x + z*(1+eta), clip_min, clip_max)
y = tf.reshape(model(xadv), [-1])[0]
g = tf.gradients(y, xadv)[0]
dx = - y * g / tf.norm(g)
return i+1, z+dx
_, noise = tf.while_loop(_cond, _body, [0, tf.zeros_like(x)],
name='_deepfool2_impl', back_prop=False)
return noise
def cross_entropy_sequence_loss(logits, targets, sequence_length):
"""Calculates the per-example cross-entropy loss for a sequence of logits and
masks out all losses passed the sequence length.
Args:
logits: Logits of shape `[T, B, vocab_size]`
targets: Target classes of shape `[T, B]`
sequence_length: An int32 tensor of shape `[B]` corresponding
to the length of each input
Returns:
A tensor of shape [T, B] that contains the loss per example, per time step.
"""
with tf.name_scope("cross_entropy_sequence_loss"):
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=targets)
# Mask out the losses we don't care about
loss_mask = tf.sequence_mask(
tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0]))
losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0])
return losses
def huber_loss(infer, label, epsilon, layer_name):
"""
Args:
infer
label
epsilon
layer_name
"""
with tf.variable_scope(layer_name):
abs_diff = tf.abs(tf.sub(infer, label));
index = tf.to_int32(abs_diff <= epsilon, name = 'partition_index')
l1_part, l2_part = tf.dynamic_partition(abs_diff, index, 2)
#l1_loss = tf.reduce_mean(l1_part, name = 'l1_loss')
#l2_loss = tf.reduce_mean(tf.square(l2_part), name = 'l2_loss')
l1_part_loss = epsilon * (l1_part - 0.5 * epsilon)
l2_part_loss = 0.5 * tf.square(l2_part)
hloss = tf.reduce_mean(tf.concat(0, [l1_part_loss,l2_part_loss]),
name = 'huber_loss_sum')
return hloss
def huber_loss(infer, label, epsilon, layer_name):
"""
Args:
infer
label
epsilon
layer_name
"""
with tf.variable_scope(layer_name):
abs_diff = tf.abs(tf.sub(infer, label));
index = tf.to_int32(abs_diff <= epsilon, name = 'partition_index')
l1_part, l2_part = tf.dynamic_partition(abs_diff, index, 2)
#l1_loss = tf.reduce_mean(l1_part, name = 'l1_loss')
#l2_loss = tf.reduce_mean(tf.square(l2_part), name = 'l2_loss')
l1_part_loss = epsilon * (l1_part - 0.5 * epsilon)
l2_part_loss = 0.5 * tf.square(l2_part)
hloss = tf.reduce_mean(tf.concat(0, [l1_part_loss,l2_part_loss]),
name = 'huber_loss_sum')
return hloss
def ternary_decoder(encoded_data, scaler, shape):
"""Decoding the signs to float format """
a = tf.cast(encoded_data, tf.int32)
a_split1 = tf.mod(a,4)
a_split2 = tf.to_int32(tf.mod(a/4,4))
a_split3 = tf.to_int32(tf.mod(a/16,4))
a_split4 = tf.to_int32(tf.mod(a/64,4))
a = tf.concat([a_split1, a_split2, a_split3, a_split4], 0)
real_size = tf.reduce_prod(shape)
a = tf.to_float(a)
a = tf.gather(a, tf.range(0,real_size))
a = tf.reshape(a, shape)
a = tf.subtract(a,1)
decoded = a*scaler
return decoded
def _crop(image, offset_height, offset_width, crop_height, crop_width):
original_shape = tf.shape(image)
rank_assertion = tf.Assert(
tf.equal(tf.rank(image), 3),
['Rank of image must be equal to 3.'])
cropped_shape = control_flow_ops.with_dependencies(
[rank_assertion],
tf.stack([crop_height, crop_width, original_shape[2]]))
size_assertion = tf.Assert(
tf.logical_and(
tf.greater_equal(original_shape[0], crop_height),
tf.greater_equal(original_shape[1], crop_width)),
['Crop size greater than the image size.'])
offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to
# define the crop size.
image = control_flow_ops.with_dependencies([size_assertion], tf.slice(image, offsets, cropped_shape))
return tf.reshape(image, cropped_shape)
def tf_random_aspect_resize(image, label, low_val=1.0, upper_val=1.5):
shape = tf.shape(image)
height = shape[0]
width = shape[1]
# 1~1.5
which_side = tf.to_float(tf.random_uniform([1]))[0]
multi_val = tf.to_float(tf.random_uniform([1]))[0] * (upper_val - low_val) + low_val
new_height = tf.cond(which_side > 0.5, lambda: tf.to_float(height), lambda: tf.to_float(height) * multi_val)
new_width = tf.cond(which_side <= 0.5, lambda: tf.to_float(width), lambda: tf.to_float(width) * multi_val)
new_height = tf.to_int32(new_height)
new_width = tf.to_int32(new_width)
image = tf.expand_dims(image, 0)
label = tf.expand_dims(label, 0)
resized_image = tf.image.resize_bilinear(image, [new_height, new_width], align_corners=False)
resized_image = tf.cast(resized_image, tf.uint8)
resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width], align_corners=False)
resized_label = tf.cast(resized_label, tf.uint8)
resized_image = tf.squeeze(resized_image, 0)
resized_label = tf.squeeze(resized_label, 0)
return resized_image, resized_label
def get_learning_rate_decay(learning_rate, global_step, params):
if params.learning_rate_decay == "noam":
step = tf.to_float(global_step)
warmup_steps = tf.to_float(params.warmup_steps)
multiplier = params.hidden_size ** -0.5
decay = multiplier * tf.minimum((step + 1) * (warmup_steps ** -1.5),
(step + 1) ** -0.5)
return learning_rate * decay
elif params.learning_rate_decay == "piecewise_constant":
return tf.train.piecewise_constant(tf.to_int32(global_step),
params.learning_rate_boundaries,
params.learning_rate_values)
elif params.learning_rate_decay == "none":
return learning_rate
else:
raise ValueError("Unknown learning_rate_decay")
tensorflow_backend.py 文件源码
项目:deep-learning-keras-projects
作者: jasmeetsb
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
"""Runs CTC loss algorithm on each batch element.
# Arguments
y_true: tensor `(samples, max_string_length)` containing the truth labels.
y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction,
or output of the softmax.
input_length: tensor `(samples, 1)` containing the sequence length for
each batch item in `y_pred`.
label_length: tensor `(samples, 1)` containing the sequence length for
each batch item in `y_true`.
# Returns
Tensor with shape (samples,1) containing the
CTC loss of each element
"""
label_length = tf.to_int32(tf.squeeze(label_length))
input_length = tf.to_int32(tf.squeeze(input_length))
sparse_labels = tf.to_int32(ctc_label_dense_to_sparse(y_true, label_length))
y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8)
return tf.expand_dims(ctc.ctc_loss(inputs=y_pred,
labels=sparse_labels,
sequence_length=input_length), 1)
def image_scaling(img, label):
"""
Randomly scales the images between 0.5 to 1.5 times the original size.
Args:
img: Training image to scale.
label: Segmentation mask to scale.
"""
scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None)
h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale))
w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale))
new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1])
img = tf.image.resize_images(img, new_shape)
label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
label = tf.squeeze(label, squeeze_dims=[0])
return img, label
def _crop(image, offset_height, offset_width, crop_height, crop_width):
original_shape = tf.shape(image)
rank_assertion = tf.Assert(
tf.equal(tf.rank(image), 3),
['Rank of image must be equal to 3.'])
cropped_shape = control_flow_ops.with_dependencies(
[rank_assertion],
tf.stack([crop_height, crop_width, original_shape[2]]))
size_assertion = tf.Assert(
tf.logical_and(
tf.greater_equal(original_shape[0], crop_height),
tf.greater_equal(original_shape[1], crop_width)),
['Crop size greater than the image size.'])
offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to
# define the crop size.
image = control_flow_ops.with_dependencies(
[size_assertion],
tf.slice(image, offsets, cropped_shape))
return tf.reshape(image, cropped_shape)
def select_present(x, presence, batch_size=1, name='select_present'):
with tf.variable_scope(name):
presence = 1 - tf.to_int32(presence) # invert mask
bs = x.get_shape()[0]
if bs != None: # here type(bs) is tf.Dimension and == is ok
batch_size = int(bs)
num_partitions = 2 * batch_size
r = tf.range(0, num_partitions, 2)
r.set_shape(tf.TensorShape(batch_size))
r = broadcast_against(r, presence)
presence += r
selected = tf.dynamic_partition(x, presence, num_partitions)
selected = tf.concat(axis=0, values=selected)
selected = tf.reshape(selected, tf.shape(x))
return selected
def _bbox_to_mask(yy, region_size, dtype):
# trim bounding box exeeding region_size on top and left
neg_part = tf.nn.relu(-yy[:2])
core = tf.ones(tf.to_int32(tf.round(yy[2:] - neg_part)), dtype=dtype)
y1 = tf.maximum(yy[0], 0.)
x1 = tf.maximum(yy[1], 0.)
y2 = tf.minimum(region_size[0], yy[0] + yy[2])
x2 = tf.minimum(region_size[1], yy[1] + yy[3])
padding = (y1, region_size[0] - y2, x1, region_size[1] - x2)
padding = tf.reshape(tf.stack(padding), (-1, 2))
padding = tf.to_int32(tf.round(padding))
mask = tf.pad(core, padding)
# trim bounding box exeeding region_size on bottom and right
rs = tf.to_int32(tf.round(region_size))
mask = mask[:rs[0], :rs[1]]
mask.set_shape((None, None))
return mask
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bboxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
if cfg.RESNET.MAX_POOL:
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size],
name="crops")
crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
else:
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE],
name="crops")
return crops
# Do the first few layers manually, because 'SAME' padding can behave inconsistently
# for images of different sizes: sometimes 0, sometimes 1