def flip_randomly_left_right_image_with_annotation(image_tensor, annotation_tensor):
"""Accepts image tensor and annotation tensor and returns randomly flipped tensors of both.
The function performs random flip of image and annotation tensors with probability of 1/2
The flip is performed or not performed for image and annotation consistently, so that
annotation matches the image.
Parameters
----------
image_tensor : Tensor of size (width, height, 3)
Tensor with image
annotation_tensor : Tensor of size (width, height, 1)
Tensor with annotation
Returns
-------
randomly_flipped_img : Tensor of size (width, height, 3) of type tf.float.
Randomly flipped image tensor
randomly_flipped_annotation : Tensor of size (width, height, 1)
Randomly flipped annotation tensor
"""
# Random variable: two possible outcomes (0 or 1)
# with a 1 in 2 chance
random_var = tf.random_uniform(maxval=2, dtype=tf.int32, shape=[])
randomly_flipped_img = control_flow_ops.cond(pred=tf.equal(random_var, 0),
fn1=lambda: tf.image.flip_left_right(image_tensor),
fn2=lambda: image_tensor)
randomly_flipped_annotation = control_flow_ops.cond(pred=tf.equal(random_var, 0),
fn1=lambda: tf.image.flip_left_right(annotation_tensor),
fn2=lambda: annotation_tensor)
return randomly_flipped_img, randomly_flipped_annotation
python类float()的实例源码
def distort_randomly_image_color(image_tensor, fast_mode=False):
"""Accepts image tensor of (width, height, 3) and returns color distorted image.
The function performs random brightness, saturation, hue, contrast change as it is performed
for inception model training in TF-Slim (you can find the link below in comments). All the
parameters of random variables were originally preserved. There are two regimes for the function
to work: fast and slow. Slow one performs only saturation and brightness random change is performed.
Parameters
----------
image_tensor : Tensor of size (width, height, 3) of tf.int32 or tf.float
Tensor with image with range [0,255]
fast_mode : boolean
Boolean value representing whether to use fast or slow mode
Returns
-------
img_float_distorted_original_range : Tensor of size (width, height, 3) of type tf.float.
Image Tensor with distorted color in [0,255] intensity range
"""
# Make the range to be in [0,1]
img_float_zero_one_range = tf.to_float(image_tensor) / 255
# Randomly distort the color of image. There are 4 ways to do it.
# Credit: TF-Slim
# https://github.com/tensorflow/models/blob/master/slim/preprocessing/inception_preprocessing.py#L224
# Most probably the inception models were trainined using this color augmentation:
# https://github.com/tensorflow/models/tree/master/slim#pre-trained-models
distorted_image = apply_with_random_selector(img_float_zero_one_range,
lambda x, ordering: distort_color(x, ordering, fast_mode=fast_mode),
num_cases=4)
img_float_distorted_original_range = distorted_image * 255
return img_float_distorted_original_range
def flip_randomly_left_right_image_with_annotation(image_tensor, annotation_tensor):
"""Accepts image tensor and annotation tensor and returns randomly flipped tensors of both.
The function performs random flip of image and annotation tensors with probability of 1/2
The flip is performed or not performed for image and annotation consistently, so that
annotation matches the image.
Parameters
----------
image_tensor : Tensor of size (width, height, 3)
Tensor with image
annotation_tensor : Tensor of size (width, height, 1)
Tensor with annotation
Returns
-------
randomly_flipped_img : Tensor of size (width, height, 3) of type tf.float.
Randomly flipped image tensor
randomly_flipped_annotation : Tensor of size (width, height, 1)
Randomly flipped annotation tensor
"""
# Random variable: two possible outcomes (0 or 1)
# with a 1 in 2 chance
random_var = tf.random_uniform(maxval=2, dtype=tf.int32, shape=[])
randomly_flipped_img = control_flow_ops.cond(pred=tf.equal(random_var, 0),
fn1=lambda: tf.image.flip_left_right(image_tensor),
fn2=lambda: image_tensor)
randomly_flipped_annotation = control_flow_ops.cond(pred=tf.equal(random_var, 0),
fn1=lambda: tf.image.flip_left_right(annotation_tensor),
fn2=lambda: annotation_tensor)
return randomly_flipped_img, randomly_flipped_annotation
def distort_randomly_image_color(image_tensor, fast_mode=False):
"""Accepts image tensor of (width, height, 3) and returns color distorted image.
The function performs random brightness, saturation, hue, contrast change as it is performed
for inception model training in TF-Slim (you can find the link below in comments). All the
parameters of random variables were originally preserved. There are two regimes for the function
to work: fast and slow. Slow one performs only saturation and brightness random change is performed.
Parameters
----------
image_tensor : Tensor of size (width, height, 3) of tf.int32 or tf.float
Tensor with image with range [0,255]
fast_mode : boolean
Boolean value representing whether to use fast or slow mode
Returns
-------
img_float_distorted_original_range : Tensor of size (width, height, 3) of type tf.float.
Image Tensor with distorted color in [0,255] intensity range
"""
# Make the range to be in [0,1]
img_float_zero_one_range = tf.to_float(image_tensor) / 255
# Randomly distort the color of image. There are 4 ways to do it.
# Credit: TF-Slim
# https://github.com/tensorflow/models/blob/master/slim/preprocessing/inception_preprocessing.py#L224
# Most probably the inception models were trainined using this color augmentation:
# https://github.com/tensorflow/models/tree/master/slim#pre-trained-models
distorted_image = apply_with_random_selector(img_float_zero_one_range,
lambda x, ordering: distort_color(x, ordering, fast_mode=fast_mode),
num_cases=4)
img_float_distorted_original_range = distorted_image * 255
return img_float_distorted_original_range
def flip_randomly_left_right_image_with_annotation(image_tensor, annotation_tensor):
"""Accepts image tensor and annotation tensor and returns randomly flipped tensors of both.
The function performs random flip of image and annotation tensors with probability of 1/2
The flip is performed or not performed for image and annotation consistently, so that
annotation matches the image.
Parameters
----------
image_tensor : Tensor of size (width, height, 3)
Tensor with image
annotation_tensor : Tensor of size (width, height, 1)
Tensor with annotation
Returns
-------
randomly_flipped_img : Tensor of size (width, height, 3) of type tf.float.
Randomly flipped image tensor
randomly_flipped_annotation : Tensor of size (width, height, 1)
Randomly flipped annotation tensor
"""
# Random variable: two possible outcomes (0 or 1)
# with a 1 in 2 chance
random_var = tf.random_uniform(maxval=2, dtype=tf.int32, shape=[])
randomly_flipped_img = control_flow_ops.cond(pred=tf.equal(random_var, 0),
fn1=lambda: tf.image.flip_left_right(image_tensor),
fn2=lambda: image_tensor)
randomly_flipped_annotation = control_flow_ops.cond(pred=tf.equal(random_var, 0),
fn1=lambda: tf.image.flip_left_right(annotation_tensor),
fn2=lambda: annotation_tensor)
return randomly_flipped_img, randomly_flipped_annotation
def distort_randomly_image_color(image_tensor, fast_mode=False):
"""Accepts image tensor of (width, height, 3) and returns color distorted image.
The function performs random brightness, saturation, hue, contrast change as it is performed
for inception model training in TF-Slim (you can find the link below in comments). All the
parameters of random variables were originally preserved. There are two regimes for the function
to work: fast and slow. Slow one performs only saturation and brightness random change is performed.
Parameters
----------
image_tensor : Tensor of size (width, height, 3) of tf.int32 or tf.float
Tensor with image with range [0,255]
fast_mode : boolean
Boolean value representing whether to use fast or slow mode
Returns
-------
img_float_distorted_original_range : Tensor of size (width, height, 3) of type tf.float.
Image Tensor with distorted color in [0,255] intensity range
"""
# Make the range to be in [0,1]
img_float_zero_one_range = tf.to_float(image_tensor) / 255
# Randomly distort the color of image. There are 4 ways to do it.
# Credit: TF-Slim
# https://github.com/tensorflow/models/blob/master/slim/preprocessing/inception_preprocessing.py#L224
# Most probably the inception models were trainined using this color augmentation:
# https://github.com/tensorflow/models/tree/master/slim#pre-trained-models
distorted_image = apply_with_random_selector(img_float_zero_one_range,
lambda x, ordering: distort_color(x, ordering, fast_mode=fast_mode),
num_cases=4)
img_float_distorted_original_range = distorted_image * 255
return img_float_distorted_original_range
def get_labels_from_annotation(annotation_tensor, class_labels):
"""Returns tensor of size (width, height, num_classes) derived from annotation tensor.
The function returns tensor that is of a size (width, height, num_classes) which
is derived from annotation tensor with sizes (width, height) where value at
each position represents a class. The functions requires a list with class
values like [0, 1, 2 ,3] -- they are used to derive labels. Derived values will
be ordered in the same way as the class numbers were provided in the list. Last
value in the aforementioned list represents a value that indicate that the pixel
should be masked out. So, the size of num_classes := len(class_labels) - 1.
Parameters
----------
annotation_tensor : Tensor of size (width, height)
Tensor with class labels for each element
class_labels : list of ints
List that contains the numbers that represent classes. Last
value in the list should represent the number that was used
for masking out.
Returns
-------
labels_2d_stacked : Tensor of size (width, height, num_classes).
Tensor with labels for each pixel.
"""
# Last value in the classes list should show
# which number was used in the annotation to mask out
# the ambigious regions or regions that should not be
# used for training.
# TODO: probably replace class_labels list with some custom object
valid_entries_class_labels = class_labels[:-1]
# Stack the binary masks for each class
labels_2d = map(lambda x: tf.equal(annotation_tensor, x),
valid_entries_class_labels)
# Perform the merging of all of the binary masks into one matrix
labels_2d_stacked = tf.stack(labels_2d, axis=2)
# Convert tf.bool to tf.float
# Later on in the labels and logits will be used
# in tf.softmax_cross_entropy_with_logits() function
# where they have to be of the float type.
labels_2d_stacked_float = tf.to_float(labels_2d_stacked)
return labels_2d_stacked_float
def get_labels_from_annotation(annotation_tensor, class_labels):
"""Returns tensor of size (width, height, num_classes) derived from annotation tensor.
The function returns tensor that is of a size (width, height, num_classes) which
is derived from annotation tensor with sizes (width, height) where value at
each position represents a class. The functions requires a list with class
values like [0, 1, 2 ,3] -- they are used to derive labels. Derived values will
be ordered in the same way as the class numbers were provided in the list. Last
value in the aforementioned list represents a value that indicate that the pixel
should be masked out. So, the size of num_classes := len(class_labels) - 1.
Parameters
----------
annotation_tensor : Tensor of size (width, height)
Tensor with class labels for each element
class_labels : list of ints
List that contains the numbers that represent classes. Last
value in the list should represent the number that was used
for masking out.
Returns
-------
labels_2d_stacked : Tensor of size (width, height, num_classes).
Tensor with labels for each pixel.
"""
# Last value in the classes list should show
# which number was used in the annotation to mask out
# the ambigious regions or regions that should not be
# used for training.
# TODO: probably replace class_labels list with some custom object
valid_entries_class_labels = class_labels[:-1]
# Stack the binary masks for each class
labels_2d = list(map(lambda x: tf.equal(annotation_tensor, x),
valid_entries_class_labels))
# Perform the merging of all of the binary masks into one matrix
labels_2d_stacked = tf.stack(labels_2d, axis=2)
# Convert tf.bool to tf.float
# Later on in the labels and logits will be used
# in tf.softmax_cross_entropy_with_logits() function
# where they have to be of the float type.
labels_2d_stacked_float = tf.to_float(labels_2d_stacked)
return labels_2d_stacked_float
def get_labels_from_annotation(annotation_tensor, class_labels):
"""Returns tensor of size (width, height, num_classes) derived from annotation tensor.
The function returns tensor that is of a size (width, height, num_classes) which
is derived from annotation tensor with sizes (width, height) where value at
each position represents a class. The functions requires a list with class
values like [0, 1, 2 ,3] -- they are used to derive labels. Derived values will
be ordered in the same way as the class numbers were provided in the list. Last
value in the aforementioned list represents a value that indicate that the pixel
should be masked out. So, the size of num_classes := len(class_labels) - 1.
Parameters
----------
annotation_tensor : Tensor of size (width, height)
Tensor with class labels for each element
class_labels : list of ints
List that contains the numbers that represent classes. Last
value in the list should represent the number that was used
for masking out.
Returns
-------
labels_2d_stacked : Tensor of size (width, height, num_classes).
Tensor with labels for each pixel.
"""
# Last value in the classes list should show
# which number was used in the annotation to mask out
# the ambigious regions or regions that should not be
# used for training.
# TODO: probably replace class_labels list with some custom object
valid_entries_class_labels = class_labels[:-1]
# Stack the binary masks for each class
labels_2d = map(lambda x: tf.equal(annotation_tensor, x),
valid_entries_class_labels)
# Perform the merging of all of the binary masks into one matrix
labels_2d_stacked = tf.stack(labels_2d, axis=2)
# Convert tf.bool to tf.float
# Later on in the labels and logits will be used
# in tf.softmax_cross_entropy_with_logits() function
# where they have to be of the float type.
labels_2d_stacked_float = tf.to_float(labels_2d_stacked)
return labels_2d_stacked_float
def __init__(self,
box_specs_list,
base_anchor_size=None,
clip_window=None):
"""Constructs a MultipleGridAnchorGenerator.
To construct anchors, at multiple grid resolutions, one must provide a
list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
size, a corresponding list of (scale, aspect ratio) box specifications.
For example:
box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
[(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
To support the fully convolutional setting, we pass grid sizes in at
generation time, while scale and aspect ratios are fixed at construction
time.
Args:
box_specs_list: list of list of (scale, aspect ratio) pairs with the
outside list having the same number of entries as feature_map_shape_list
(which is passed in at generation time).
base_anchor_size: base anchor size as [height, width]
(length-2 float tensor, default=[256, 256]).
clip_window: a tensor of shape [4] specifying a window to which all
anchors should be clipped. If clip_window is None, then no clipping
is performed.
Raises:
ValueError: if box_specs_list is not a list of list of pairs
ValueError: if clip_window is not either None or a tensor of shape [4]
"""
if isinstance(box_specs_list, list) and all(
[isinstance(list_item, list) for list_item in box_specs_list]):
self._box_specs = box_specs_list
else:
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
if base_anchor_size is None:
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
self._base_anchor_size = base_anchor_size
if clip_window is not None and clip_window.get_shape().as_list() != [4]:
raise ValueError('clip_window must either be None or a shape [4] tensor')
self._clip_window = clip_window
self._scales = []
self._aspect_ratios = []
for box_spec in self._box_specs:
if not all([isinstance(entry, tuple) and len(entry) == 2
for entry in box_spec]):
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
scales, aspect_ratios = zip(*box_spec)
self._scales.append(scales)
self._aspect_ratios.append(aspect_ratios)
def create_ssd_anchors(num_layers=6,
min_scale=0.2,
max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
base_anchor_size=None,
reduce_boxes_in_lowest_layer=True):
"""Creates MultipleGridAnchorGenerator for SSD anchors.
This function instantiates a MultipleGridAnchorGenerator that reproduces
``default box`` construction proposed by Liu et al in the SSD paper.
See Section 2.2 for details. Grid sizes are assumed to be passed in
at generation time from finest resolution to coarsest resolution --- this is
used to (linearly) interpolate scales of anchor boxes corresponding to the
intermediate grid sizes.
Anchors that are returned by calling the `generate` method on the returned
MultipleGridAnchorGenerator object are always in normalized coordinates
and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
Args:
num_layers: integer number of grid layers to create anchors for (actual
grid sizes passed in at generation time)
min_scale: scale of anchors corresponding to finest resolution (float)
max_scale: scale of anchors corresponding to coarsest resolution (float)
aspect_ratios: list or tuple of (float) aspect ratios to place on each
grid point.
base_anchor_size: base anchor size as [height, width].
reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
boxes per location is used in the lowest layer.
Returns:
a MultipleGridAnchorGenerator
"""
if base_anchor_size is None:
base_anchor_size = [1.0, 1.0]
base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
box_specs_list = []
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
for i in range(num_layers)] + [1.0]
for layer, scale, scale_next in zip(
range(num_layers), scales[:-1], scales[1:]):
layer_box_specs = []
if layer == 0 and reduce_boxes_in_lowest_layer:
layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
else:
for aspect_ratio in aspect_ratios:
layer_box_specs.append((scale, aspect_ratio))
if aspect_ratio == 1.0:
layer_box_specs.append((np.sqrt(scale*scale_next), 1.0))
box_specs_list.append(layer_box_specs)
return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size)