def _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.1):
ws = gt_boxes[:, 2] - gt_boxes[:, 0]
hs = gt_boxes[:, 3] - gt_boxes[:, 1]
shape = tf.shape(gt_boxes)[0]
jitter = tf.random_uniform([shape, 1], minval = -jitter, maxval = jitter)
jitter = tf.reshape(jitter, [-1])
ws_offset = ws * jitter
hs_offset = hs * jitter
x1s = gt_boxes[:, 0] + ws_offset
x2s = gt_boxes[:, 2] + ws_offset
y1s = gt_boxes[:, 1] + hs_offset
y2s = gt_boxes[:, 3] + hs_offset
boxes = tf.concat(
values=[
x1s[:, tf.newaxis],
y1s[:, tf.newaxis],
x2s[:, tf.newaxis],
y2s[:, tf.newaxis]],
axis=1)
new_scores = tf.ones([shape], tf.float32)
new_batch_inds = tf.zeros([shape], tf.int32)
return tf.concat(values=[rois, boxes], axis=0), \
tf.concat(values=[scores, new_scores], axis=0), \
tf.concat(values=[batch_inds, new_batch_inds], axis=0)
python类newaxis()的实例源码
def det_net_loss(seg_masks_in, reg_masks_in,
seg_preds, reg_preds,
reg_loss_weight=10.0,
epsilon=1e-5):
with tf.variable_scope('loss'):
out_size = seg_preds.get_shape()[1:3]
seg_masks_in_ds = tf.image.resize_images(seg_masks_in[:,:,:,tf.newaxis],
out_size[0], out_size[1],
tf.image.ResizeMethod.NEAREST_NEIGHBOR)
reg_masks_in_ds = tf.image.resize_images(reg_masks_in,
out_size[0], out_size[1],
tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# segmentation loss
seg_masks_onehot = slim.one_hot_encoding(seg_masks_in_ds[:,:,:,0], 2)
seg_loss = - tf.reduce_mean(seg_masks_onehot * tf.log(seg_preds + epsilon))
# regression loss
mask = tf.to_float(seg_masks_in_ds)
reg_loss = tf.reduce_sum(mask * (reg_preds - reg_masks_in_ds)**2)
reg_loss = reg_loss / (tf.reduce_sum(mask) + 1.0)
return seg_loss + reg_loss_weight * reg_loss
def multiply_along_batch_dim(batch_tt, weights):
"""Multiply each TensorTrain in a batch by a number.
Args:
batch_tt: TensorTrainBatch object, TT-matrices or TT-tensors.
weights: 1-D tf.Tensor (or something convertible to it like np.array) of size
tt.batch_sie with weights.
Returns:
TensorTrainBatch
"""
weights = tf.convert_to_tensor(weights)
tt_cores = list(batch_tt.tt_cores)
if batch_tt.is_tt_matrix():
weights = weights[:, tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis]
else:
weights = weights[:, tf.newaxis, tf.newaxis, tf.newaxis]
tt_cores[0] = weights * tt_cores[0]
out_shape = batch_tt.get_raw_shape()
out_ranks = batch_tt.get_tt_ranks()
out_batch_size = batch_tt.batch_size
return TensorTrainBatch(tt_cores, out_shape, out_ranks, out_batch_size)
def _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.1):
ws = gt_boxes[:, 2] - gt_boxes[:, 0]
hs = gt_boxes[:, 3] - gt_boxes[:, 1]
shape = tf.shape(gt_boxes)[0]
jitter = tf.random_uniform([shape, 1], minval = -jitter, maxval = jitter)
jitter = tf.reshape(jitter, [-1])
ws_offset = ws * jitter
hs_offset = hs * jitter
x1s = gt_boxes[:, 0] + ws_offset
x2s = gt_boxes[:, 2] + ws_offset
y1s = gt_boxes[:, 1] + hs_offset
y2s = gt_boxes[:, 3] + hs_offset
boxes = tf.concat(
values=[
x1s[:, tf.newaxis],
y1s[:, tf.newaxis],
x2s[:, tf.newaxis],
y2s[:, tf.newaxis]],
axis=1)
new_scores = tf.ones([shape], tf.float32)
new_batch_inds = tf.zeros([shape], tf.int32)
return tf.concat(values=[rois, boxes], axis=0), \
tf.concat(values=[scores, new_scores], axis=0), \
tf.concat(values=[batch_inds, new_batch_inds], axis=0)
def _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.1):
ws = gt_boxes[:, 2] - gt_boxes[:, 0]
hs = gt_boxes[:, 3] - gt_boxes[:, 1]
shape = tf.shape(gt_boxes)[0]
jitter = tf.random_uniform([shape, 1], minval = -jitter, maxval = jitter)
jitter = tf.reshape(jitter, [-1])
ws_offset = ws * jitter
hs_offset = hs * jitter
x1s = gt_boxes[:, 0] + ws_offset
x2s = gt_boxes[:, 2] + ws_offset
y1s = gt_boxes[:, 1] + hs_offset
y2s = gt_boxes[:, 3] + hs_offset
boxes = tf.concat(
values=[
x1s[:, tf.newaxis],
y1s[:, tf.newaxis],
x2s[:, tf.newaxis],
y2s[:, tf.newaxis]],
axis=1)
new_scores = tf.ones([shape], tf.float32)
new_batch_inds = tf.zeros([shape], tf.int32)
return tf.concat(values=[rois, boxes], axis=0), \
tf.concat(values=[scores, new_scores], axis=0), \
tf.concat(values=[batch_inds, new_batch_inds], axis=0)
def _combine_box_and_delta(self, bboxes, deltas):
widths = bboxes[:, 2] - bboxes[:, 0] + 1.0
heights = bboxes[:, 3] - bboxes[:, 1] + 1.0
ctr_x = bboxes[:, 0] + 0.5 * widths
ctr_y = bboxes[:, 1] + 0.5 * heights
# use 0::4 to make it a [-1, 1] matrix, while the columns are 4
dx = deltas[:, 0::4]
dy = deltas[:, 1::4]
dw = deltas[:, 2::4]
dh = deltas[:, 3::4]
# do not understand the transformation
# TF ?????reshape????????????????
pred_ctr_x = tf.reshape(dx * widths[:, tf.newaxis] + ctr_x[:, tf.newaxis], (-1,))
pred_ctr_y = tf.reshape(dy * heights[:, tf.newaxis] + ctr_y[:, tf.newaxis], (-1,))
pred_w = tf.reshape(tf.exp(dw) * widths[:, tf.newaxis], (-1,))
pred_h = tf.reshape(tf.exp(dh) * heights[:, tf.newaxis], (-1,))
pred_boxes = tf.pack(
[pred_ctr_x - 0.5 * pred_w,
pred_ctr_y - 0.5 * pred_h,
pred_ctr_x + 0.5 * pred_w,
pred_ctr_y + 0.5 * pred_h],
axis=1
)
return pred_boxes
def _compute_targets(self, ex_rois, gt_rois, labels):
targets = self._bbox_transform(ex_rois, gt_rois)
# TODO: check if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
return tf.concat(1, [tf.cast(labels, dtype=tf.float32)[:, tf.newaxis], targets])
def _build(self, img, transform_params):
if len(img.get_shape()) == 3:
img = img[..., tf.newaxis]
grid_coords = self._warper(transform_params)
return snt.resampler(img, grid_coords)
def flip_gt_boxes(gt_boxes, ih, iw):
x1s, y1s, x2s, y2s, cls = \
gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2], gt_boxes[:, 3], gt_boxes[:, 4]
x1s = tf.to_float(iw) - x1s
x2s = tf.to_float(iw) - x2s
return tf.concat(values=(x2s[:, tf.newaxis],
y1s[:, tf.newaxis],
x1s[:, tf.newaxis],
y2s[:, tf.newaxis],
cls[:, tf.newaxis]), axis=1)
def resize_gt_boxes(gt_boxes, scale_ratio):
xys, cls = \
gt_boxes[:, 0:4], gt_boxes[:, 4]
xys = xys * scale_ratio
return tf.concat(values=(xys, cls[:, tf.newaxis]), axis=1)
def crop(images, boxes, batch_inds, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'):
"""Cropping areas of features into fixed size
Params:
--------
images: a 4-d Tensor of shape (N, H, W, C)
boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
batch_inds:
Returns:
--------
A Tensor of shape (N, pooled_height, pooled_width, C)
"""
with tf.name_scope(scope):
#
boxes = boxes / (stride + 0.0)
boxes = tf.reshape(boxes, [-1, 4])
# normalize the boxes and swap x y dimensions
shape = tf.shape(images)
boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
xs = boxes[:, 0]
ys = boxes[:, 1]
xs = xs / tf.cast(shape[2], tf.float32)
ys = ys / tf.cast(shape[1], tf.float32)
boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2)
# if batch_inds is False:
# num_boxes = tf.shape(boxes)[0]
# batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
# batch_inds = boxes[:, 0] * 0
# batch_inds = tf.cast(batch_inds, tf.int32)
# assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
with tf.control_dependencies([assert_op, images, batch_inds]):
return tf.image.crop_and_resize(images, boxes, batch_inds,
[pooled_height, pooled_width],
method='bilinear',
name='Crop')
def _bbox_to_mask_fixed_size(yy, region_size, output_size, dtype):
mask = _bbox_to_mask(yy, region_size, dtype)
nonzero_region = tf.greater(tf.reduce_prod(tf.shape(mask)), 0)
mask = tf.cond(nonzero_region, lambda: mask, lambda: tf.zeros(output_size, dtype))
mask = tf.image.resize_images(mask[..., tf.newaxis], output_size)[..., 0]
return mask
def __init__(self, inpt, bbox0, presence0, batch_size, glimpse_size,
feature_extractor, rnn_units, bbox_gain=-4., att_gain=-2.5,
zoneout_prob=0., identity_init=True, attention_module=RATMAttention, normalize_glimpse=False,
debug=False, clip_bbox=False, transform_init_features=False,
transform_init_state=False, dfn_readout=False, feature_shape=None, is_training=True):
self.inpt = inpt
self.bbox0 = bbox0
self.presence0 = presence0
self.glimpse_size = glimpse_size
self.feature_extractor = feature_extractor
self.rnn_units = rnn_units
self.batch_size = batch_size
self.inpt_size = convert_shape(inpt.get_shape()[2:], np.int32)
self.bbox_gain = ensure_array(bbox_gain, 4)[np.newaxis]
self.att_gain = ensure_array(att_gain, attention_module.n_params)[np.newaxis]
self.zoneout_prob = zoneout_prob
self.identity_init = identity_init
self.attention_module = attention_module
self.normalize_glimpse = normalize_glimpse
self.debug = debug
self.clip_bbox = clip_bbox
self.transform_init_features = transform_init_features
self.transform_init_state = transform_init_state
self.dfn_readout = dfn_readout
self.feature_shape = feature_shape
self.is_training = tf.convert_to_tensor(is_training)
super(HierarchicalAttentiveRecurrentTracker, self).__init__(self.__class__.__name__)
try:
self.register(is_training)
except ValueError: pass
def gaussian_mask(params, R, C):
"""Define a mask of size RxC given by one 1-D Gaussian per row.
u, s and d must be 1-dimensional vectors"""
u, s, d = (params[..., i] for i in xrange(3))
for i in (u, s, d):
assert len(u.get_shape()) == 1, i
batch_size = tf.to_int32(tf.shape(u)[0])
R = tf.range(tf.to_int32(R))
C = tf.range(tf.to_int32(C))
R = tf.to_float(R)[tf.newaxis, tf.newaxis, :]
C = tf.to_float(C)[tf.newaxis, :, tf.newaxis]
C = tf.tile(C, (batch_size, 1, 1))
u, d = u[:, tf.newaxis, tf.newaxis], d[:, tf.newaxis, tf.newaxis]
s = s[:, tf.newaxis, tf.newaxis]
ur = u + (R - 0.) * d
sr = tf.ones_like(ur) * s
mask = C - ur
mask = tf.exp(-.5 * (mask / sr) ** 2)
mask /= tf.reduce_sum(mask, 1, keep_dims=True) + 1e-8
return mask
def extract_glimpse(inpt, attention_params, glimpse_size):
"""Extracts an attention glimpse
:param inpt: tensor of shape == (batch_size, img_height, img_width)
:param attention_params: tensor of shape = (batch_size, 6) as
[uy, sy, dy, ux, sx, dx] with u - mean, s - std, d - stride"
:param glimpse_size: 2-tuple of ints as (height, width),
size of the extracted glimpse
:return: tensor
"""
ap = attention_params
shape = inpt.get_shape()
rank = len(shape)
assert rank in (3, 4), "Input must be 3 or 4 dimensional tensor"
inpt_H, inpt_W = shape[1:3]
if rank == 3:
inpt = inpt[..., tf.newaxis]
rank += 1
Fy = gaussian_mask(ap[..., 0::2], glimpse_size[0], inpt_H)
Fx = gaussian_mask(ap[..., 1::2], glimpse_size[1], inpt_W)
gs = []
for channel in tf.unstack(inpt, axis=rank - 1):
g = tf.matmul(tf.matmul(Fy, channel, adjoint_a=True), Fx)
gs.append(g)
g = tf.stack(gs, axis=rank - 1)
g.set_shape([shape[0]] + list(glimpse_size))
return g
def bbox_to_attention(self, bbox):
with tf.variable_scope('ratm_bbox_to_attention'):
us = bbox[..., :2] / self.inpt_size[np.newaxis, :2]
ss = 0.5 * bbox[..., 2:] / self.inpt_size[np.newaxis, :2]
ds = bbox[..., 2:] / (self.inpt_size[np.newaxis, :2] - 1.)
att = tf.concat(axis=tf.rank(bbox) - 1, values=(us, ss, ds))
return att
def bbox_to_attention(self, bbox):
with tf.variable_scope('fixed_std_bbox_to_attention'):
us = bbox[..., :2] / self.inpt_size[np.newaxis, :2]
ds = bbox[..., 2:] / (self.inpt_size[np.newaxis, :2] - 1.)
att = tf.concat(axis=tf.rank(bbox) - 1, values=(us, ds))
att.set_shape(bbox.get_shape()[:-1].concatenate([4]))
return att
def _stride_to_std(self, stride):
shape = convert_shape(stride.get_shape())
stride_flat = tf.reshape(stride, (-1, shape[-1]))
y, x = stride_flat[..., 0], stride_flat[..., 1]
features = [
tf.ones_like(y),
y, y ** 2, y ** 3, y ** 4,
x, x ** 2, x ** 3, x ** 4,
y * x, y * x ** 2, y ** 2 * x,
y * x ** 3, y ** 2 * x ** 2, y ** 3 * x
]
features = tf.concat(axis=1, values=[f[..., tf.newaxis] for f in features])
sigma_flat = tf.matmul(features, self.weights)
return tf.reshape(sigma_flat, shape)
def _to_attention(self, raw_att, with_bias=True):
bbox = FixedStdAttention.attention_to_bbox(self, raw_att)
us = bbox[..., :2]
if with_bias:
us += self.offset_bias
ds = bbox[..., 2:4] / (self.glimpse_size[np.newaxis, :2] - 1)
ss = self._stride_to_std(ds)
ap = tf.concat(axis=tf.rank(raw_att) - 1, values=(us, ss, ds), name='attention')
ap.set_shape(raw_att.get_shape()[:-1].concatenate((6,)))
return ap
def predictions_and_gradient(self, image, label):
image = self._process_input(image)
predictions, gradient = self._session.run(
[self._logits, self._gradient],
feed_dict={
self._images: image[np.newaxis],
self._label: label})
gradient = self._process_gradient(gradient)
return predictions, gradient
def gradient(self, image, label):
image = self._process_input(image)
g = self._session.run(
self._gradient,
feed_dict={
self._images: image[np.newaxis],
self._label: label})
g = self._process_gradient(g)
return g
def _loss_fn(self, image, label):
image = self._process_input(image)
loss = self._session.run(
self._loss,
feed_dict={
self._images: image[np.newaxis],
self._label: label})
return loss
def backward(self, gradient, image):
assert gradient.ndim == 1
image = self._process_input(image)
g = self._session.run(
self._bw_gradient,
feed_dict={
self._images: image[np.newaxis],
self._bw_gradient_pre: gradient})
g = self._process_gradient(g)
assert g.shape == image.shape
return g
def compute_detections_greedy(seg_preds, boxes_preds, num_outputs,
seg_threshold=0.2,
sigma=5e-3, step=0.2, num_iters=20,
dist_threshold=20.0):
mask_flat = tf.reshape(seg_preds[:,:,1], [-1])
boxes_flat = tf.reshape(boxes_preds, [-1, 4])
# TODO: also collect (y,x) coordinates
idxs = tf.where(mask_flat > seg_threshold)[:,0]
boxes = tf.gather(boxes_flat, idxs)
boxes, confidence = refine_boxes(boxes, num_iters, step, sigma)
num_boxes = tf.shape(boxes)[0]
dists = tf.nn.relu(nnutil.pairwise_distance(boxes / sigma))
weights = tf.exp(-dists)
def _next_detection(prev, i):
_, _, presence = prev
confidence_curr = tf.reduce_sum(weights * presence, [1], True)
idx = tf.to_int32(tf.argmax(confidence_curr, 0)[0])
mask = tf.to_float(tf.gather(dists, idx) > dist_threshold)[:,tf.newaxis]
presence = presence * mask
confidence = tf.gather(confidence_curr, idx)[0]
return idx, confidence, presence
idxs, confidence, presences = tf.scan(_next_detection,
tf.range(0, num_outputs),
initializer=(0,
0.0,
tf.ones([num_boxes,1])))
return tf.gather(boxes, idxs), confidence
def flip_gt_boxes(gt_boxes, ih, iw):
x1s, y1s, x2s, y2s, cls = \
gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2], gt_boxes[:, 3], gt_boxes[:, 4]
x1s = tf.to_float(iw) - x1s
x2s = tf.to_float(iw) - x2s
return tf.concat(values=(x2s[:, tf.newaxis],
y1s[:, tf.newaxis],
x1s[:, tf.newaxis],
y2s[:, tf.newaxis],
cls[:, tf.newaxis]), axis=1)
def resize_gt_boxes(gt_boxes, scale_ratio):
xys, cls = \
gt_boxes[:, 0:4], gt_boxes[:, 4]
xys = xys * scale_ratio
return tf.concat(values=(xys, cls[:, tf.newaxis]), axis=1)
def crop(images, boxes, batch_inds, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'):
"""Cropping areas of features into fixed size
Params:
--------
images: a 4-d Tensor of shape (N, H, W, C)
boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
batch_inds:
Returns:
--------
A Tensor of shape (N, pooled_height, pooled_width, C)
"""
with tf.name_scope(scope):
#
boxes = boxes / (stride + 0.0)
boxes = tf.reshape(boxes, [-1, 4])
# normalize the boxes and swap x y dimensions
shape = tf.shape(images)
boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
xs = boxes[:, 0]
ys = boxes[:, 1]
xs = xs / tf.cast(shape[2], tf.float32)
ys = ys / tf.cast(shape[1], tf.float32)
boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2)
# if batch_inds is False:
# num_boxes = tf.shape(boxes)[0]
# batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
# batch_inds = boxes[:, 0] * 0
# batch_inds = tf.cast(batch_inds, tf.int32)
# assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
with tf.control_dependencies([assert_op, images, batch_inds]):
return tf.image.crop_and_resize(images, boxes, batch_inds,
[pooled_height, pooled_width],
method='bilinear',
name='Crop')
def crop_(images, boxes, batch_inds, ih, iw, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'):
"""Cropping areas of features into fixed size
Params:
--------
images: a 4-d Tensor of shape (N, H, W, C)
boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
batch_inds:
Returns:
--------
A Tensor of shape (N, pooled_height, pooled_width, C)
"""
with tf.name_scope(scope):
#
boxes = boxes / (stride + 0.0)
boxes = tf.reshape(boxes, [-1, 4])
# normalize the boxes and swap x y dimensions
shape = tf.shape(images)
boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
xs = boxes[:, 0]
ys = boxes[:, 1]
xs = xs / tf.cast(shape[2], tf.float32)
ys = ys / tf.cast(shape[1], tf.float32)
boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2)
# if batch_inds is False:
# num_boxes = tf.shape(boxes)[0]
# batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
# batch_inds = boxes[:, 0] * 0
# batch_inds = tf.cast(batch_inds, tf.int32)
# assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
with tf.control_dependencies([assert_op, images, batch_inds]):
return [tf.image.crop_and_resize(images, boxes, batch_inds,
[pooled_height, pooled_width],
method='bilinear',
name='Crop')] + [boxes]
def flip_gt_boxes(gt_boxes, ih, iw):
x1s, y1s, x2s, y2s, cls = \
gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2], gt_boxes[:, 3], gt_boxes[:, 4]
x1s = tf.to_float(iw) - x1s
x2s = tf.to_float(iw) - x2s
return tf.concat(values=(x2s[:, tf.newaxis],
y1s[:, tf.newaxis],
x1s[:, tf.newaxis],
y2s[:, tf.newaxis],
cls[:, tf.newaxis]), axis=1)
def resize_gt_boxes(gt_boxes, scale_ratio):
xys, cls = \
gt_boxes[:, 0:4], gt_boxes[:, 4]
xys = xys * scale_ratio
return tf.concat(values=(xys, cls[:, tf.newaxis]), axis=1)