def _roi_pool_layer(self, bootom, rois, name):
with tf.variable_scope(name) as scope:
return tf.image.roi_pooling(bootom, rois,
pooled_height=cfg.POOLING_SIZE,
pooled_width=cfg.POOLING_SIZE,
spatial_scale=1. / 16.)[0]
python类image()的实例源码
def _build_network(self, is_training=True):
# select initializers
if cfg.TRAIN.TRUNCATED:
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
else:
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
net_conv = self._image_to_head(is_training)
with tf.variable_scope(self._scope, self._scope):
# build the anchors for the image
self._anchor_component()
# region proposal network
rois = self._region_proposal(net_conv, is_training, initializer)
# region of interest pooling
if cfg.POOLING_MODE == 'crop':
pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
else:
raise NotImplementedError
fc7 = self._head_to_tail(pool5, is_training)
with tf.variable_scope(self._scope, self._scope):
# region classification
cls_prob, bbox_pred = self._region_classification(fc7, is_training,
initializer, initializer_bbox)
self._score_summaries.update(self._predictions)
return rois, cls_prob, bbox_pred
def test_image(self, sess, image, im_info):
feed_dict = {self._image: image,
self._im_info: im_info}
cls_score, cls_prob, bbox_pred, rois = sess.run([self._predictions["cls_score"],
self._predictions['cls_prob'],
self._predictions['bbox_pred'],
self._predictions['rois']],
feed_dict=feed_dict)
return cls_score, cls_prob, bbox_pred, rois
def read_images_from_disk(input_queue):
# copied from http://stackoverflow.com/questions/34340489/tensorflow-read-images-with-labels
"""Consumes a single filename and label as a ' '-delimited string.
Args:
filename_and_label_tensor: A scalar string tensor.
Returns:
Two tensors: the decoded image, and the string label.
"""
#label = input_queue[1]
label = input_queue[-1]
alphas = input_queue[1]
file_contents = tf.read_file(input_queue[0])
example = tf.image.decode_image(file_contents, channels=NUM_CHANNELS)
return example, alphas, label
def subtract_mean(image_tensor, mean_image_path, image_size=512):
mean_image = tf.convert_to_tensor(mean_image_path, dtype=tf.string)
image_tensor.set_shape([image_size, image_size, NUM_CHANNELS])
image = tf.cast(image_tensor, tf.float32)
#subtract mean image
mean_file_contents = tf.read_file(mean_image)
mean_uint8 = tf.image.decode_image(mean_file_contents, channels=NUM_CHANNELS)
mean_uint8.set_shape([image_size, image_size, NUM_CHANNELS])
image_mean_free = tf.subtract(image, tf.cast(mean_uint8, tf.float32))
return image_mean_free
def inputs(image_list, label_list, batch_size, mean_image_path, image_size=512):
"""Construct input for CIFAR evaluation using the Reader ops.
Args:
eval_data: bool, indicating if one should use the train or eval data set.
data_dir: Path to the CIFAR-10 data directory.
batch_size: Number of images per batch.
Returns:
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
images = tf.convert_to_tensor(image_list, dtype=tf.string)
labels = tf.convert_to_tensor(label_list, dtype=tf.int32)
# Makes an input queue
input_queue = tf.train.slice_input_producer([images, labels], shuffle=True, capacity=10*batch_size)
uint8image, _, label = read_images_from_disk(input_queue)
image_mean_free = subtract_mean(uint8image, mean_image_path, image_size=image_size)
# Optional Preprocessing or Data Augmentation
# tf.image implements most of the standard image augmentation
#image = preprocess_image(image)
#label = preprocess_label(label)
# Generate a batch of images and labels by building up a queue of examples.
num_preprocess_threads = 10
return tf.train.batch([image_mean_free, label], #tf.train.shuffle_batch(
batch_size=batch_size,
capacity=10*batch_size,
num_threads=num_preprocess_threads)
def inputs_with_alphas(image_list, alphas_list, label_list, batch_size, mean_image_path):
"""Construct input for CIFAR evaluation using the Reader ops.
Args:
eval_data: bool, indicating if one should use the train or eval data set.
data_dir: Path to the CIFAR-10 data directory.
batch_size: Number of images per batch.
Returns:
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
images = tf.convert_to_tensor(image_list, dtype=tf.string)
labels = tf.convert_to_tensor(label_list, dtype=tf.int32)
alphas = tf.convert_to_tensor(alphas_list, dtype=tf.float32)
mean_image = tf.convert_to_tensor(mean_image_path, dtype=tf.string)
# Makes an input queue
input_queue = tf.train.slice_input_producer([images, alphas, labels],
shuffle=True)
uint8image, alpha, label = read_images_from_disk(input_queue)
image_mean_free = subtract_mean(uint8image, mean_image)
# Optional Preprocessing or Data Augmentation
# tf.image implements most of the standard image augmentation
#image = preprocess_image(image)
#label = preprocess_label(label)
# Generate a batch of images and labels by building up a queue of examples.
num_preprocess_threads = 4
return tf.train.batch([image_mean_free, alpha, label], #tf.train.shuffle_batch(
batch_size=batch_size,
num_threads=num_preprocess_threads)
def _add_gt_image(self):
# add back mean
image = self._image + cfg.PIXEL_MEANS
# BGR to RGB (opencv uses BGR)
resized = tf.image.resize_bilinear(image, tf.to_int32(self._im_info[:2] / self._im_info[2]))
self._gt_image = tf.reverse(resized, axis=[-1])
def _add_gt_image_summary(self):
# use a customized visualization function to visualize the boxes
if self._gt_image is None:
self._add_gt_image()
image = tf.py_func(draw_bounding_boxes,
[self._gt_image, self._gt_boxes, self._im_info],
tf.float32, name="gt_boxes")
return tf.summary.image('GROUND_TRUTH', image)
def _roi_pool_layer(self, bootom, rois, name):
with tf.variable_scope(name) as scope:
return tf.image.roi_pooling(bootom, rois,
pooled_height=cfg.POOLING_SIZE,
pooled_width=cfg.POOLING_SIZE,
spatial_scale=1. / 16.)[0]
def _build_network(self, is_training=True):
# select initializers
if cfg.TRAIN.TRUNCATED:
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
else:
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
net_conv = self._image_to_head(is_training)
with tf.variable_scope(self._scope, self._scope):
# build the anchors for the image
self._anchor_component()
# region proposal network
rois = self._region_proposal(net_conv, is_training, initializer)
# region of interest pooling
if cfg.POOLING_MODE == 'crop':
pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
else:
raise NotImplementedError
fc7 = self._head_to_tail(pool5, is_training)
with tf.variable_scope(self._scope, self._scope):
# region classification
cls_prob, bbox_pred = self._region_classification(fc7, is_training,
initializer, initializer_bbox)
self._score_summaries.update(self._predictions)
return rois, cls_prob, bbox_pred
def test_image(self, sess, image, im_info):
feed_dict = {self._image: image,
self._im_info: im_info}
cls_score, cls_prob, bbox_pred, rois = sess.run([self._predictions["cls_score_n"],
self._predictions['cls_prob'],
self._predictions['bbox_pred_n'],
self._predictions['rois']],
feed_dict=feed_dict)
return cls_score, cls_prob, bbox_pred, rois
def central_crop(images, central_fraction):
"""Crop the central region of the image.
(A mirror to tf.image central_crop)
Remove the outer parts of an image but retain the central region of the image
along each dimension. If we specify central_fraction = 0.5, this function
returns the region marked with "X" in the below diagram.
--------
|........|
|..XXXX..|
|..XXXX..|
|........| where "X" is the central 50% of the image.
--------
```
Args:
images: 4-D Tensor of shape `[batch, height, width, channels]` or
3-D Tensor of shape `[height, width, channels]`.
central_fraction: float (0, 1], fraction of size to crop
Raises:
ValueError: if central_crop_fraction is not within (0, 1].
Returns:
If `images` was 4-D, a 4-D float Tensor of shape
`[batch, new_height, new_width, channels]`.
If `images` was 3-D, a 3-D float Tensor of shape
`[new_height, new_width, channels]`.
"""
images_shape = get_shape(images)
if len(images_shape) > 4:
ValueError("'image' must have either 3 or 4 dimensions, "
"received `{}`.".format(images_shape))
if len(images_shape) == 4:
return tf.map_fn(lambda img: tf.image.central_crop(img, central_fraction), images)
return tf.image.central_crop(images, central_fraction)
```
def transpose(images):
"""Transpose an image/images by swapping the first and second dimension.
(A mirror to tf.image transpose_image)
Args:
images: 4-D Tensor of shape `[batch, height, width, channels]` or
3-D Tensor of shape `[height, width, channels]`.
Returns:
If `image` was 4-D, a 4-D float Tensor of shape
`[batch, target_height, target_width, channels]`
If `image` was 3-D, a 3-D float Tensor of shape
`[target_height, target_width, channels]
Raises:
ValueError: if the shape of `image` not supported.
"""
images_shape = get_shape(images)
if len(images_shape) > 4:
ValueError("'image' must have either 3 or 4 dimensions, "
"received `{}`.".format(images_shape))
if len(images_shape) == 4:
return tf.map_fn(tf.image.transpose_image, images)
return tf.image.transpose_image(images)
def rotate90(images, k=1, is_random=False, seed=None, name=None):
"""Rotate (randomly) images counter-clockwise by 90 degrees.
(A mirror to tf.image rot90)
Args:
images: 4-D Tensor of shape `[batch, height, width, channels]` or
3-D Tensor of shape `[height, width, channels]`.
k: A scalar integer. The number of times the image is rotated by 90 degrees.
is_random: `bool`, If True, adjust randomly.
seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed}.
name: A name for this operation (optional).
Returns:
If `image` was 4-D, a 4-D float Tensor of shape
`[batch, target_height, target_width, channels]`
If `image` was 3-D, a 3-D float Tensor of shape
`[target_height, target_width, channels]
Raises:
ValueError: if the shape of `image` not supported.
"""
if is_random:
k = random_ops.random_shuffle([0, 1, 2, 3], seed=seed)[0]
images_shape = get_shape(images)
if len(images_shape) > 4:
ValueError("'image' must have either 3 or 4 dimensions, "
"received `{}`.".format(images_shape))
if len(images_shape) == 4:
return tf.map_fn(lambda img: tf.image.rot90(img, k, name), images)
return tf.image.rot90(images, k, name)
def convert_images_dtype(images, dtype, saturate=False, name=None):
"""Convert image(s) to `dtype`, scaling its values if needed.
(A mirror to tf.image convert_image_dtype)
Images that are represented using floating point values are expected to have
values in the range [0,1). Image data stored in integer data types are
expected to have values in the range `[0,MAX]`, where `MAX` is the largest
positive representable number for the data type.
This op converts between data types, scaling the values appropriately before
casting.
Note that converting from floating point inputs to integer types may lead to
over/underflow problems. Set saturate to `True` to avoid such problem in
problematic conversions. If enabled, saturation will clip the output into the
allowed range before performing a potentially dangerous cast (and only before
performing such a cast, i.e., when casting from a floating point to an integer
type, and when casting from a signed to an unsigned type; `saturate` has no
effect on casts between floats, or on casts that increase the type's range).
Args:
images: An image.
dtype: A `DType` to convert `image` to.
saturate: If `True`, clip the input before casting (if necessary).
name: A name for this operation (optional).
Returns:
`image`, converted to `dtype`.
"""
images_shape = get_shape(images)
if len(images_shape) > 4:
ValueError("'image' must have either 3 or 4 dimensions, "
"received `{}`.".format(images_shape))
if len(images_shape) == 4:
return tf.map_fn(lambda img: tf.image.convert_image_dtype(
img, dtype=dtype, saturate=saturate, name=name), images)
return tf.image.convert_image_dtype(images, dtype=dtype, saturate=saturate, name=name)
def adjust_brightness(images, delta, is_random=False, seed=None):
"""Adjust (randomly) the brightness of RGB or Grayscale images.
(A mirror to tf.image adjust_brightness, random_birightness)
This is a convenience method that converts an RGB image to float
representation, adjusts its brightness, and then converts it back to the
original data type. If several adjustments are chained it is advisable to
minimize the number of redundant conversions.
The value `delta` is added to all components of the tensor `image`. Both
`image` and `delta` are converted to `float` before adding (and `image` is
scaled appropriately if it is in fixed-point representation). For regular
images, `delta` should be in the range `[0,1)`, as it is added to the image in
floating point representation, where pixel values are in the `[0,1)` range.
If `is_random` is `True`, adjust brightness using a value randomly picked in the
interval `[-delta, delta)`.
Args:
images: A tensor.
delta: `float`. Amount to add to the pixel values.
is_random: `bool`, If True, adjust randomly.
seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed}.
Returns:
A brightness-adjusted tensor of the same shape and type as `images`.
"""
if is_random:
return tf.image.random_brightness(images, max_delta=delta, seed=seed)
return tf.image.adjust_brightness(images, delta=delta)
def adjust_hue(images, delta, is_random=False, seed=None, name=None):
"""Adjust (randomly) hue of an RGB images.
(A mirror to tf.image adjust_hue, random_hue)
This is a convenience method that converts an RGB image to float
representation, converts it to HSV, add an offset to the hue channel, converts
back to RGB and then back to the original data type. If several adjustments
are chained it is advisable to minimize the number of redundant conversions.
`image` is an RGB image. The image hue is adjusted by converting the
image to HSV and rotating the hue channel (H) by `delta`.
The image is then converted back to RGB.
`delta` must be in the interval `[-1, 1]`.
If `is_random` is `True` adjust hue but uses a value randomly picked in
the interval `[-delta, delta]`.
Args:
images: RGB image or images. Size of the last dimension must be 3.
delta: float. How much to add to the hue channel.
is_random: `bool`, If True, adjust randomly.
seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed}.
name: A name for this operation (optional).
Returns:
Adjusted image(s), same shape and DType as `image`.
"""
if is_random:
return tf.image.random_hue(images, max_delta=delta, seed=seed)
return tf.image.adjust_hue(images=images, delta=delta, name=name)
def call(self, inputs, **kwargs):
return adjust_gamma(image=inputs, gamma=self.gamma, gain=self.gain)
def standardize(images):
"""Linearly scales `image` to have zero mean and unit norm.
(A mirror to tf.image per_image_standardization)
This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
of all values in image, and
`adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.
`stddev` is the standard deviation of all values in `image`. It is capped
away from zero to protect against division by 0 when handling uniform images.
Args:
images: 4-D Tensor of shape `[batch, height, width, channels]` or
3-D Tensor of shape `[height, width, channels]`.
Returns:
The standardized image with same shape as `image`.
Raises:
ValueError: if the shape of 'image' is incompatible with this function.
"""
images_shape = get_shape(images)
if len(images_shape) > 4:
ValueError("'image' must have either 3 or 4 dimensions, "
"received `{}`.".format(images_shape))
if len(images_shape) == 4:
return tf.map_fn(tf.image.per_image_standardization, images)
return tf.image.per_image_standardization(images)