def _random_crop_and_resize_image(cls, image, bbox, height, width,
val=False):
with tf.name_scope('random_crop_and_resize'):
if not val:
# bbox_begin, bbox_size, distorted_bbox = \
bbox_begin, bbox_size, _ = \
tf.image.sample_distorted_bounding_box(
tf.shape(image),
bounding_boxes=bbox,
min_object_covered=0.1,
aspect_ratio_range=[0.8, 1.25],
area_range=[0.1, 1.0],
max_attempts=100,
use_image_if_no_bounding_boxes=True)
# Crop the image to the distorted bounding box
image = tf.slice(image, bbox_begin, bbox_size)
# Resize to the desired output size
image = tf.image.resize_images(
image,
[height, width],
tf.image.ResizeMethod.BILINEAR,
align_corners=False)
image.set_shape([height, width, 3])
return image
python类slice()的实例源码
def _dynamic_crop(cls, inputs, static_shape, dynamic_shape, data_format='channels_last'):
input_shape = cls.spatial_shape(inputs, data_format, True)
n_channels = cls.num_channels(inputs, data_format)
if data_format == 'channels_last':
slice_size = [(-1,), dynamic_shape, (n_channels,)]
output_shape = [None] * (len(static_shape) + 1) + [n_channels]
else:
slice_size = [(-1, n_channels), dynamic_shape]
output_shape = [None, n_channels] + [None] * len(static_shape)
begin = [0] * len(inputs.get_shape().as_list())
size = tf.concat(slice_size, axis=0)
cond = tf.reduce_sum(tf.abs(input_shape - dynamic_shape)) > 0
x = tf.cond(cond, lambda: tf.slice(inputs, begin=begin, size=size), lambda: inputs)
x.set_shape(output_shape)
return x
def total_variation(image_batch):
"""
:param image_batch: A 4D tensor of shape [batch_size, width, height, channels]
"""
batch_shape = image_batch.get_shape().as_list()
width = batch_shape[1]
left = tf.slice(image_batch, [0, 0, 0, 0], [-1, width - 1, -1, -1])
right = tf.slice(image_batch, [0, 1, 0, 0], [-1, -1, -1, -1])
height = batch_shape[2]
top = tf.slice(image_batch, [0, 0, 0, 0], [-1, -1, height - 1, -1])
bottom = tf.slice(image_batch, [0, 0, 1, 0], [-1, -1, -1, -1])
# left and right are 1 less wide than the original, top and bottom 1 less tall
# In order to combine them, we take 1 off the height of left-right, and 1 off width of top-bottom
horizontal_diff = tf.slice(tf.sub(left, right), [0, 0, 0, 0], [-1, -1, height - 1, -1])
vertical_diff = tf.slice(tf.sub(top, bottom), [0, 0, 0, 0], [-1, width - 1, -1, -1])
sum_of_pixel_diffs_squared = tf.add(tf.square(horizontal_diff), tf.square(vertical_diff))
total_variation = tf.reduce_sum(tf.sqrt(sum_of_pixel_diffs_squared))
# TODO: Should this be normalized by the number of pixels?
return total_variation
def style_loss(self, layers):
activations = [self.activations_for_layer(i) for i in layers]
gramians = [self.gramian_for_layer(x) for x in layers]
# Slices are for style and synth image
gramian_diffs = [
tf.sub(
tf.tile(tf.slice(g, [0, 0, 0], [self.num_style, -1, -1]), [self.num_synthesized - self.num_style + 1, 1, 1]),
tf.slice(g, [self.num_style + self.num_content, 0, 0], [self.num_synthesized, -1, -1]))
for g in gramians]
Ns = [g.get_shape().as_list()[2] for g in gramians]
Ms = [a.get_shape().as_list()[1] * a.get_shape().as_list()[2] for a in activations]
scaled_diffs = [tf.square(g) for g in gramian_diffs]
style_loss = tf.div(
tf.add_n([tf.div(tf.reduce_sum(x), 4 * (N ** 2) * (M ** 2)) for x, N, M in zip(scaled_diffs, Ns, Ms)]),
len(layers))
return style_loss
def _crop(image, offset_height, offset_width, crop_height, crop_width):
original_shape = tf.shape(image)
rank_assertion = tf.Assert(
tf.equal(tf.rank(image), 3),
['Rank of image must be equal to 3.'])
cropped_shape = control_flow_ops.with_dependencies(
[rank_assertion],
tf.stack([crop_height, crop_width, original_shape[2]]))
size_assertion = tf.Assert(
tf.logical_and(
tf.greater_equal(original_shape[0], crop_height),
tf.greater_equal(original_shape[1], crop_width)),
['Crop size greater than the image size.'])
offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to
# define the crop size.
image = control_flow_ops.with_dependencies(
[size_assertion],
tf.slice(image, offsets, cropped_shape))
return tf.reshape(image, cropped_shape)
def get_image_summary(img, idx=0):
"""
Make an image summary for 4d tensor image with index idx
"""
V = tf.slice(img, (0, 0, 0, idx), (1, -1, -1, 1))
V -= tf.reduce_min(V)
V /= tf.reduce_max(V)
V *= 255
img_w = tf.shape(img)[1]
img_h = tf.shape(img)[2]
V = tf.reshape(V, tf.stack((img_w, img_h, 1)))
V = tf.transpose(V, (2, 0, 1))
V = tf.reshape(V, tf.stack((-1, img_w, img_h, 1)))
return V
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bboxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
if cfg.RESNET.MAX_POOL:
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size],
name="crops")
crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
else:
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE],
name="crops")
return crops
# Do the first few layers manually, because 'SAME' padding can behave inconsistently
# for images of different sizes: sometimes 0, sometimes 1
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
def build_discriminator(x_data, x_generated, keep_prob):
x_data=tf.unstack(x_data,seq_size,1);
x_generated=list(x_generated);
x_in = tf.concat([x_data, x_generated],1);
x_in=tf.unstack(x_in,seq_size,0);
lstm_cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(n_hidden), output_keep_prob=keep_prob) for _ in range(d_num_layers)]);
with tf.variable_scope("dis") as dis:
weights=tf.Variable(tf.random_normal([n_hidden, 1]));
biases=tf.Variable(tf.random_normal([1]));
outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x_in, dtype=tf.float32);
res=tf.matmul(outputs[-1], weights) + biases;
y_data = tf.nn.sigmoid(tf.slice(res, [0, 0], [batch_size, -1], name=None));
y_generated = tf.nn.sigmoid(tf.slice(res, [batch_size, 0], [-1, -1], name=None));
d_params=[v for v in tf.global_variables() if v.name.startswith(dis.name)];
with tf.name_scope("desc_params"):
for param in d_params:
variable_summaries(param);
return y_data, y_generated, d_params;
def random_shift(self, images, states, actions):
print 'shifting the video sequence randomly in time'
tshift = 2
uselen = self.conf['use_len']
fulllength = self.conf['sequence_length']
nshifts = (fulllength - uselen) / 2 + 1
rand_ind = tf.random_uniform([1], 0, nshifts, dtype=tf.int64)
self.rand_ind = rand_ind
start = tf.concat(axis=0,values=[tf.zeros(1, dtype=tf.int64), rand_ind * tshift, tf.zeros(3, dtype=tf.int64)])
images_sel = tf.slice(images, start, [-1, uselen, -1, -1, -1])
start = tf.concat(axis=0, values=[tf.zeros(1, dtype=tf.int64), rand_ind * tshift, tf.zeros(1, dtype=tf.int64)])
actions_sel = tf.slice(actions, start, [-1, uselen, -1])
start = tf.concat(axis=0, values=[tf.zeros(1, dtype=tf.int64), rand_ind * tshift, tf.zeros(1, dtype=tf.int64)])
states_sel = tf.slice(states, start, [-1, uselen, -1])
return images_sel, states_sel, actions_sel
def __init__(self, conf, gpu_id, start_images, actions, start_states, pix_distrib1,pix_distrib2):
nsmp_per_gpu = conf['batch_size']/ conf['ngpu']
# picking different subset of the actions for each gpu
startidx = gpu_id * nsmp_per_gpu
actions = tf.slice(actions, [startidx, 0, 0], [nsmp_per_gpu, -1, -1])
start_images = tf.slice(start_images, [startidx, 0, 0, 0, 0], [nsmp_per_gpu, -1, -1, -1, -1])
start_states = tf.slice(start_states, [startidx, 0, 0], [nsmp_per_gpu, -1, -1])
pix_distrib1 = tf.slice(pix_distrib1, [startidx, 0, 0, 0, 0], [nsmp_per_gpu, -1, -1, -1, -1])
pix_distrib2 = tf.slice(pix_distrib2, [startidx, 0, 0, 0, 0], [nsmp_per_gpu, -1, -1, -1, -1])
print 'startindex for gpu {0}: {1}'.format(gpu_id, startidx)
from prediction_train_sawyer import Model
if 'ndesig' in conf:
self.model = Model(conf, start_images, actions, start_states, pix_distrib=pix_distrib1,pix_distrib2=pix_distrib2, inference=True)
# self.model = Model(conf, start_images, actions, start_states, pix_distrib=pix_distrib1,
# pix_distrib2=pix_distrib2,
# reuse_scope=reuse_scope)
else:
# self.model = Model(conf,start_images,actions,start_states, pix_distrib=pix_distrib1, reuse_scope= reuse_scope)
self.model = Model(conf, start_images, actions, start_states, pix_distrib=pix_distrib1, inference=True)
def _attention(query, attn_states, is_training, reuse, attn_size, attn_vec_size, attn_length, trainable=True, name='attention'):
with tf.variable_scope(name, reuse=reuse):
v = tf.get_variable(
name="V", shape=[attn_vec_size], trainable=trainable)
attn_states_reshaped = tf.reshape(
attn_states, shape=[-1, attn_length, 1, attn_size])
attn_conv = conv2d(attn_states_reshaped, attn_vec_size, is_training, reuse, filter_size=(
1, 1), stride=(1, 1), trainable=trainable, use_bias=False)
y = _linear(query, attn_vec_size, reuse)
y = tf.reshape(y, [-1, 1, 1, attn_vec_size])
s = tf.reduce_sum(v * tf.tanh(attn_conv + y), [2, 3])
a = softmax(s)
d = tf.reduce_sum(tf.reshape(
a, [-1, attn_length, 1, 1]) * attn_states_reshaped, [1, 2])
new_attns = tf.reshape(d, [-1, attn_size])
new_attn_states = tf.slice(attn_states, [0, 1, 0], [-1, -1, -1])
return new_attns, new_attn_states
def crop_and_concat(inputs1, inputs2, name='crop_concat'):
"""Concates two features maps
concates different sizes feature maps cropping the larger map
concatenation across output channels
Args:
inputs1: A `Tensor`
inputs2: A `Tensor`
Returns:
concated output tensor
"""
with tf.name_scope(name):
inputs1_shape = tf.shape(inputs1)
inputs2_shape = tf.shape(inputs2)
# offsets for the top left corner of the crop
offsets = [0, (inputs1_shape[1] - inputs2_shape[1]) // 2,
(inputs1_shape[2] - inputs2_shape[2]) // 2, 0]
size = [-1, inputs2_shape[1], inputs2_shape[2], -1]
inputs1_crop = tf.slice(inputs1, offsets, size)
return tf.concat([inputs1_crop, inputs2], axis=3)
def resize_axis(tensor, axis, new_size, fill_value=0):
"""Truncates or pads a tensor to new_size on on a given axis.
Truncate or extend tensor such that tensor.shape[axis] == new_size. If the
size increases, the padding will be performed at the end, using fill_value.
Args:
tensor: The tensor to be resized.
axis: An integer representing the dimension to be sliced.
new_size: An integer or 0d tensor representing the new value for
tensor.shape[axis].
fill_value: Value to use to fill any new entries in the tensor. Will be
cast to the type of tensor.
Returns:
The resized tensor.
"""
tensor = tf.convert_to_tensor(tensor)
shape = tf.unstack(tf.shape(tensor))
pad_shape = shape[:]
pad_shape[axis] = tf.maximum(0, new_size - shape[axis])
shape[axis] = tf.minimum(shape[axis], new_size)
shape = tf.stack(shape)
resized = tf.concat([
tf.slice(tensor, tf.zeros_like(shape), shape),
tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype))
], axis)
# Update shape.
new_shape = tensor.get_shape().as_list() # A copy is being made.
new_shape[axis] = new_size
resized.set_shape(new_shape)
return resized
def get_support(self, labels, support_type=None):
if support_type == None:
support_type = FLAGS.support_type
if "," in support_type:
new_labels = []
for st in support_type.split(","):
new_labels.append(tf.cast(self.get_support(labels, st), dtype=tf.float32))
support_labels = tf.concat(new_labels, axis=1)
return support_labels
elif support_type == "vertical":
num_classes = FLAGS.num_classes
num_verticals = FLAGS.num_verticals
vertical_file = FLAGS.vertical_file
vertical_mapping = np.zeros([num_classes, num_verticals], dtype=np.float32)
float_labels = tf.cast(labels, dtype=tf.float32)
with open(vertical_file) as F:
for line in F:
group = map(int, line.strip().split())
if len(group) == 2:
x, y = group
vertical_mapping[x, y] = 1
vm_init = tf.constant_initializer(vertical_mapping)
vm = tf.get_variable("vm", shape = [num_classes, num_verticals],
trainable=False, initializer=vm_init)
vertical_labels = tf.matmul(float_labels, vm)
return tf.cast(vertical_labels > 0.2, tf.float32)
elif support_type == "frequent":
num_frequents = FLAGS.num_frequents
frequent_labels = tf.slice(labels, begin=[0, 0], size=[-1, num_frequents])
frequent_labels = tf.cast(frequent_labels, dtype=tf.float32)
return frequent_labels
elif support_type == "label":
float_labels = tf.cast(labels, dtype=tf.float32)
return float_labels
else:
raise NotImplementedError()
def resize_axis(tensor, axis, new_size, fill_value=0):
"""Truncates or pads a tensor to new_size on on a given axis.
Truncate or extend tensor such that tensor.shape[axis] == new_size. If the
size increases, the padding will be performed at the end, using fill_value.
Args:
tensor: The tensor to be resized.
axis: An integer representing the dimension to be sliced.
new_size: An integer or 0d tensor representing the new value for
tensor.shape[axis].
fill_value: Value to use to fill any new entries in the tensor. Will be
cast to the type of tensor.
Returns:
The resized tensor.
"""
tensor = tf.convert_to_tensor(tensor)
shape = tf.unstack(tf.shape(tensor))
pad_shape = shape[:]
pad_shape[axis] = tf.maximum(0, new_size - shape[axis])
shape[axis] = tf.minimum(shape[axis], new_size)
shape = tf.stack(shape)
resized = tf.concat([
tf.slice(tensor, tf.zeros_like(shape), shape),
tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype))
], axis)
# Update shape.
new_shape = tensor.get_shape().as_list() # A copy is being made.
new_shape[axis] = new_size
resized.set_shape(new_shape)
return resized
def get_support(self, labels, support_type=None):
if support_type == None:
support_type = FLAGS.support_type
if "," in support_type:
new_labels = []
for st in support_type.split(","):
new_labels.append(tf.cast(self.get_support(labels, st), dtype=tf.float32))
support_labels = tf.concat(new_labels, axis=1)
return support_labels
elif support_type == "vertical":
num_classes = FLAGS.num_classes
num_verticals = FLAGS.num_verticals
vertical_file = FLAGS.vertical_file
vertical_mapping = np.zeros([num_classes, num_verticals], dtype=np.float32)
float_labels = tf.cast(labels, dtype=tf.float32)
with open(vertical_file) as F:
for line in F:
group = map(int, line.strip().split())
if len(group) == 2:
x, y = group
vertical_mapping[x, y] = 1
vm_init = tf.constant_initializer(vertical_mapping)
vm = tf.get_variable("vm", shape = [num_classes, num_verticals],
trainable=False, initializer=vm_init)
vertical_labels = tf.matmul(float_labels, vm)
return tf.cast(vertical_labels > 0.2, tf.float32)
elif support_type == "frequent":
num_frequents = FLAGS.num_frequents
frequent_labels = tf.slice(labels, begin=[0, 0], size=[-1, num_frequents])
frequent_labels = tf.cast(frequent_labels, dtype=tf.float32)
return frequent_labels
elif support_type == "label":
float_labels = tf.cast(labels, dtype=tf.float32)
return float_labels
else:
raise NotImplementedError()
def model_from_position(cls, layer_descriptions, position_tensor, input_tensor, use_softmax=False):
""" Creates TF model from the specified position and description. """
offset = 0
model = input_tensor
for i in range(1, len(layer_descriptions)):
previous_layer = layer_descriptions[i - 1]
current_layer = layer_descriptions[i]
previous_layer_size = previous_layer[0]
current_layer_size = current_layer[0]
weights_size = previous_layer_size * current_layer_size
biases_size = current_layer_size
weights = tf.slice(position_tensor, [0, offset], [1, weights_size])
weights = tf.reshape(weights, shape=[previous_layer_size, current_layer_size])
offset += weights_size
biases = tf.slice(position_tensor, [0, offset], [1, biases_size])
biases = tf.reshape(biases, shape=[1, biases_size])
offset += biases_size
model = tf.matmul(model, weights) + biases
if i != len(layer_descriptions) - 1:
model = tf.nn.relu(model)
elif use_softmax and layer_descriptions[-1][0] > 1:
model = tf.nn.softmax(model)
return model
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
def crop_and_concat(x1, x2):
x1_shape = x1.get_shape().as_list()
x2_shape = x2.get_shape().as_list()
offsets = [0, (x1_shape[1] - x2_shape[1]) // 2, (x1_shape[2] - x2_shape[2]) // 2, (x1_shape[3] - x2_shape[3]) // 2, 0]
size = [-1, x2_shape[1], x2_shape[2], x2_shape[3], -1]
x1_crop = tf.slice(x1, offsets, size)
return tf.concat([x1_crop, x2], 4)
# Some code from https://github.com/shiba24/3d-unet.git