def _spatial_replication_padding(x, stride, output_shape, filter_shape):
_, in_height, in_width, chan = utils.tensor_shape(x)
_, out_height, out_width, chan = output_shape
filter_height, filter_width = filter_shape
total_padding_height = (out_height * stride + filter_height - 1) - in_height
total_padding_width = (out_width * stride + filter_width - 1) - in_width
padding_top = total_padding_height // 2
padding_bottom = total_padding_height - padding_top
padding_left = total_padding_width // 2
padding_right = total_padding_width - padding_left
paddings = [padding_top, padding_bottom, padding_left, padding_right]
while max(paddings) > 0:
new_paddings = [max(0, p - 1) for p in paddings]
deltas = [o - n for o, n in zip(paddings, new_paddings)]
step_paddings = [[0, 0], [deltas[0], deltas[1]], [deltas[2], deltas[3]], [0, 0]]
x = tf.pad(x, step_paddings, mode='SYMMETRIC')
paddings = new_paddings
return x
python类pad()的实例源码
def shift(self,
model_input,
shift_width,
**unused_params):
max_frames = model_input.get_shape().as_list()[1]
num_features = model_input.get_shape().as_list()[2]
shift_inputs = []
for i in xrange(shift_width):
if i == 0:
shift_inputs.append(model_input)
else:
shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:])
shift_output = tf.concat(shift_inputs, axis=2)
return shift_output
def augment(self, model_input_raw, num_frames, labels_batch, **unused_params):
assert(FLAGS.frame_feature,
"AugmentationTransformer only works with frame feature")
feature_dim = len(model_input_raw.get_shape()) - 1
frame_dim = len(model_input_raw.get_shape()) - 2
max_frame = model_input_raw.get_shape().as_list()[frame_dim]
limit = tf.cast(tf.reduce_min(num_frames) / 4.0, tf.int32)
offset = tf.random_uniform(shape=[], dtype=tf.int32) % limit
input_trans1 = tf.pad(model_input_raw[:,offset:,:], paddings=[0,offset,0])
num_frames_trans1 = num_frames - offset
num_frames_trans1 = tf.cast(
tf.random_uniform(shape=num_frames.shape, minval=0.75, maxval=1.0,
dtype=tf.float32)
* num_frames_trans1, tf.int32)
model_input = tf.concat([model_input_raw, input_trans1], axis=0)
labels_batch = tf.concat([labels_batch, labels_batch], axis=0)
num_frames = tf.concat([num_frames, num_frames_trans1], axis=0)
return model_input, labels_batch, num_frames_new
def get_default_config(self):
config = BasicModel.get_default_config(self)
model_config = {
'stride': 1,
'inception_v4_checkpoint_file': os.path.join(script_dir, '..',
'data', 'inception_v4.ckpt'),
'batch_norm_decay': 0.99,
'batch_norm_epsilon': 0.001,
'output_size': 29,
'pad': 32,
'receptive_field_size': 66,
'projective_field_size': 7,
'contextual_pad': 32,
'normalize_inputs': False,
'batch_size': 64,
}
config.update(model_config)
return config
def apply_shortcut(self, prev_inp, ch_in, ch_out, phase_train=None, w=None,
bn=None, stride=None):
if self.shortcut == 'projection':
if self.dilation:
prev_inp = DilatedConv2D(w, rate=stride)(prev_inp)
else:
prev_inp = Conv2D(w, stride=stride)(prev_inp)
prev_inp = bn({'input': prev_inp, 'phase_train': phase_train})
elif self.shortcut == 'identity':
pad_ch = ch_out - ch_in
if pad_ch < 0:
raise Exception('Must use projection when ch_in > ch_out.')
prev_inp = tf.pad(prev_inp, [[0, 0], [0, 0], [0, 0], [0, pad_ch]])
if stride > 1:
prev_inp = AvgPool(stride)(prev_inp)
raise Exception('DEBUG Unknown')
self.log.info('After proj shape: {}'.format(
prev_inp.get_shape()))
return prev_inp
def apply_shortcut(self, prev_inp, ch_in, ch_out, phase_train=None, w=None, stride=None):
if self.shortcut == 'projection':
if self.dilation:
prev_inp = DilatedConv2D(w, rate=stride)(prev_inp)
else:
prev_inp = Conv2D(w, stride=stride)(prev_inp)
bn = BatchNorm(ch_out)
prev_inp = bn({'input': prev_inp, 'phase_train': phase_train})
elif self.shortcut == 'identity':
pad_ch = ch_out - ch_in
if pad_ch < 0:
raise Exception('Must use projection when ch_in > ch_out.')
prev_inp = tf.pad(prev_inp, [[0, 0], [0, 0], [0, 0], [0, pad_ch]])
if stride > 1:
prev_inp = AvgPool(stride)(prev_inp)
bn = None
self.log.info('After proj shape: {}'.format(
prev_inp.get_shape()))
return prev_inp, bn
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
'''Pads the 2nd and 3rd dimensions of a 4D tensor
with "padding[0]" and "padding[1]" (resp.) zeros left and right.
'''
if dim_ordering == 'default':
dim_ordering = image_dim_ordering()
if dim_ordering not in {'th', 'tf'}:
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
if dim_ordering == 'th':
pattern = [[0, 0], [0, 0],
[padding[0], padding[0]], [padding[1], padding[1]]]
else:
pattern = [[0, 0],
[padding[0], padding[0]], [padding[1], padding[1]],
[0, 0]]
return tf.pad(x, pattern)
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1,
left_pad=1, right_pad=1,
dim_ordering='default'):
'''Pad the rows and columns of a 4D tensor
with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros
rows on top, bottom; cols on left, right.
'''
if dim_ordering == 'default':
dim_ordering = image_dim_ordering()
if dim_ordering not in {'th', 'tf'}:
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
if dim_ordering == 'th':
pattern = [[0, 0],
[0, 0],
[top_pad, bottom_pad],
[left_pad, right_pad]]
else:
pattern = [[0, 0],
[top_pad, bottom_pad],
[left_pad, right_pad],
[0, 0]]
return tf.pad(x, pattern)
def apply_time_pooling(inputs, sequence_length, stride, pooling_avg=False):
shape = [tf.shape(inputs)[0], tf.shape(inputs)[1], inputs.get_shape()[2].value]
if pooling_avg:
inputs_ = [inputs[:, i::stride, :] for i in range(stride)]
max_len = tf.shape(inputs_[0])[1]
for k in range(1, stride):
len_ = tf.shape(inputs_[k])[1]
paddings = tf.stack([[0, 0], [0, max_len - len_], [0, 0]])
inputs_[k] = tf.pad(inputs_[k], paddings=paddings)
inputs = tf.reduce_sum(inputs_, axis=0) / len(inputs_)
else:
inputs = inputs[:, ::stride, :]
inputs = tf.reshape(inputs, tf.stack([shape[0], tf.shape(inputs)[1], shape[2]]))
sequence_length = (sequence_length + stride - 1) // stride # rounding up
return inputs, sequence_length
def zoomout(image, gt_bboxes, params):
X_out = tf.random_uniform([], 1.05, params['X_out'])
h, w, _ = tf.unstack(tf.to_float(tf.shape(image)))
zoomout_color = params['zoomout_color']+[0]
bg_color = tf.constant(zoomout_color, dtype=tf.float32)
x_shift = tf.random_uniform([], 0, (X_out - 1) * w)
y_shift = tf.random_uniform([], 0, (X_out - 1) * h)
x2_shift = (X_out - 1) * w - x_shift
y2_shift = (X_out - 1) * h - y_shift
# somewhat hacky solution to pad with MEAN_COLOR
# tf.pad does not support custom constant padding unlike numpy
image -= bg_color
image = tf.pad(image, tf.to_int32([[y_shift, y2_shift], [x_shift, x2_shift], [0, 0]]))
image += bg_color
gt_x, gt_y, gt_w, gt_h = tf.unstack(gt_bboxes, axis=1)
gt_bboxes = tf.stack([gt_x + x_shift/w,
gt_y + y_shift/h,
gt_w, gt_h], axis=1)/X_out
return image, gt_bboxes
def forward(self):
pad = [[self.lay.pad, self.lay.pad]] * 2;
temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]])
k = self.lay.w['kernels']
ksz = self.lay.ksize
half = int(ksz / 2)
out = list()
for i in range(self.lay.h_out):
row_i = list()
for j in range(self.lay.w_out):
kij = k[i * self.lay.w_out + j]
i_, j_ = i + 1 - half, j + 1 - half
tij = temp[:, i_ : i_ + ksz, j_ : j_ + ksz,:]
row_i.append(
tf.nn.conv2d(tij, kij,
padding = 'VALID',
strides = [1] * 4))
out += [tf.concat(row_i, 2)]
self.out = tf.concat(out, 1)
def constrained_conv2d(input_, output_dim,
k_h=6, k_w=6, d_h=2, d_w=2, stddev=0.02,
name="conv2d"):
assert k_h % d_h == 0
assert k_w % d_w == 0
# constrained to have stride be a factor of kernel width
# this is intended to reduce convolution artifacts
with tf.variable_scope(name):
w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
initializer=tf.truncated_normal_initializer(stddev=stddev))
# This is meant to reduce boundary artifacts
padded = tf.pad(input_, [[0, 0],
[k_h-1, 0],
[k_w-1, 0],
[0, 0]])
conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')
biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
conv = tf.nn.bias_add(conv, biases)
return conv
def conv2d(batch_input, out_channels, filter_shape, strides, name="conv"):
with tf.variable_scope(name):
in_channels = batch_input.get_shape()[1]
in_height = batch_input.get_shape()[2]
in_width = batch_input.get_shape()[3]
kh, kw = filter_shape
_, _, sh, sw = strides
w = tf.get_variable(name="w",
shape=[kh, kw, in_channels, out_channels],
dtype=tf.float32,
initializer=tf.random_normal_initializer(0, 0.02))
# b = tf.get_variable(name='b',
# shape=[out_channels],
# initializer=tf.constant_initializer(0.0))
ph = pad_numbers(int(in_height), kh, sh)
pw = pad_numbers(int(in_width), kw, sw)
padded_input = tf.pad(batch_input, [[0, 0], [0, 0], ph, pw], mode="REFLECT")
# conv = tf.nn.bias_add(tf.nn.conv2d(padded_input, w, strides, padding="VALID", data_format="NCHW"), b, data_format="NCHW")
conv = tf.nn.conv2d(padded_input, w, strides, padding="VALID", data_format="NCHW")
return conv
def encoder(self, x):
with tf.variable_scope('encoder'):
net = resnet_utils.conv2d_same(x, 64, 7, stride=2, scope='conv1')
net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
x = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1')
x_features_all, _ = resnet_v1.resnet_v1(x,
self._blocks_encoder,
global_pool=False,
include_root_block=False,
scope=self._resnet_scope)
x_features_all = tf.reduce_mean(x_features_all, axis=[1, 2])
x_features_labeled, x_features_unlabeled = tf.split(x_features_all, 2)
x_features_tiled = tf.tile(x_features_unlabeled, [self._num_classes, 1]) # (100, 256) --> (2100, 256)
x_features = tf.concat([x_features_labeled, x_features_tiled], 0) # (2100, 256) --> (2200, 256)
return x_features
def encoder(self, x):
with tf.variable_scope('encoder'):
net = resnet_utils.conv2d_same(x, 64, 7, stride=2, scope='conv1')
net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
x = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1')
x_features_all, _ = resnet_v1.resnet_v1(x,
self._blocks_encoder,
global_pool=False,
include_root_block=False,
scope=self._resnet_scope)
x_features_all = tf.reduce_mean(x_features_all, axis=[1, 2])
x_features_labeled, x_features_unlabeled = tf.split(x_features_all, 2)
x_features_tiled = tf.tile(x_features_unlabeled, [self._num_classes, 1]) # (100, 256) --> (2100, 256)
x_features = tf.concat([x_features_labeled, x_features_tiled], 0) # (2100, 256) --> (2200, 256)
return x_features
def conv2d(input, num_filters, filter_size, stride, reuse=False,
pad='SAME', dtype=tf.float32, bias=False):
stride_shape = [1, stride, stride, 1]
filter_shape = [filter_size, filter_size, input.get_shape()[3], num_filters]
w = tf.get_variable('w', filter_shape, dtype, tf.random_normal_initializer(0.0, 0.02))
if pad == 'REFLECT':
p = (filter_size - 1) // 2
x = tf.pad(input, [[0,0],[p,p],[p,p],[0,0]], 'REFLECT')
conv = tf.nn.conv2d(x, w, stride_shape, padding='VALID')
else:
assert pad in ['SAME', 'VALID']
conv = tf.nn.conv2d(input, w, stride_shape, padding=pad)
if bias:
b = tf.get_variable('b', [1,1,1,num_filters], initializer=tf.constant_initializer(0.0))
conv = conv + b
return conv
def pad2d(inputs,
pad=(0, 0),
mode='CONSTANT',
data_format='NHWC',
trainable=True,
scope=None):
"""2D Padding layer, adding a symmetric padding to H and W dimensions.
Aims to mimic padding in Caffe and MXNet, helping the port of models to
TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`.
Args:
inputs: 4D input Tensor;
pad: 2-Tuple with padding values for H and W dimensions;
mode: Padding mode. C.f. `tf.pad`
data_format: NHWC or NCHW data format.
"""
with tf.name_scope(scope, 'pad2d', [inputs]):
# Padding shape.
if data_format == 'NHWC':
paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]
elif data_format == 'NCHW':
paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]]
net = tf.pad(inputs, paddings, mode=mode)
return net
def _conv(self, inputs, filters, kernel_size = 1, strides = 1, pad = 'VALID', name = 'conv'):
""" Spatial Convolution (CONV2D)
Args:
inputs : Input Tensor (Data Type : NHWC)
filters : Number of filters (channels)
kernel_size : Size of kernel
strides : Stride
pad : Padding Type (VALID/SAME) # DO NOT USE 'SAME' NETWORK BUILT FOR VALID
name : Name of the block
Returns:
conv : Output Tensor (Convolved Input)
"""
with tf.name_scope(name):
# Kernel for convolution, Xavier Initialisation
kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights')
conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding=pad, data_format='NHWC')
if self.w_summary:
with tf.device('/cpu:0'):
tf.summary.histogram('weights_summary', kernel, collections = ['weight'])
return conv
def _conv_bn_relu(self, inputs, filters, kernel_size = 1, strides = 1, pad = 'VALID', name = 'conv_bn_relu'):
""" Spatial Convolution (CONV2D) + BatchNormalization + ReLU Activation
Args:
inputs : Input Tensor (Data Type : NHWC)
filters : Number of filters (channels)
kernel_size : Size of kernel
strides : Stride
pad : Padding Type (VALID/SAME) # DO NOT USE 'SAME' NETWORK BUILT FOR VALID
name : Name of the block
Returns:
norm : Output Tensor
"""
with tf.name_scope(name):
kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights')
conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding='VALID', data_format='NHWC')
norm = tf.contrib.layers.batch_norm(conv, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = self.training)
if self.w_summary:
with tf.device('/cpu:0'):
tf.summary.histogram('weights_summary', kernel, collections = ['weight'])
return norm
def _attention_iter(self, inputs, lrnSize, itersize, name = 'attention_iter'):
with tf.name_scope(name):
numIn = inputs.get_shape().as_list()[3]
padding = np.floor(lrnSize/2)
pad = tf.pad(inputs, np.array([[0,0],[1,1],[1,1],[0,0]]))
U = self._conv(pad, filters=1, kernel_size=3, strides=1)
pad_2 = tf.pad(U, np.array([[0,0],[padding,padding],[padding,padding],[0,0]]))
sharedK = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([lrnSize,lrnSize, 1, 1]), name= 'shared_weights')
Q = []
C = []
for i in range(itersize):
if i ==0:
conv = tf.nn.conv2d(pad_2, sharedK, [1,1,1,1], padding='VALID', data_format='NHWC')
else:
conv = tf.nn.conv2d(Q[i-1], sharedK, [1,1,1,1], padding='SAME', data_format='NHWC')
C.append(conv)
Q_tmp = tf.nn.sigmoid(tf.add_n([C[i], U]))
Q.append(Q_tmp)
stacks = []
for i in range(numIn):
stacks.append(Q[-1])
pfeat = tf.multiply(inputs,tf.concat(stacks, axis = 3) )
return pfeat
def conv2d(inputs, filters, kernel_size = 1, strides = 1, pad = 'VALID', name = None):
"""
Create a Convolutional Layer
args :
inputs : (tensor) input Tensor
filters : (int) number of filters
kernel_size : (int) size of the kernel
strides : (int) Value of stride
pad : ('VALID'/'SAME')
return :
tf.Tensor
"""
with tf.name_scope(name):
kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights')
conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding=pad, data_format='NHWC')
with tf.device('/cpu:0'):
tf.summary.histogram('weights_summary', kernel, collections = ['train'])
return conv
def convBnrelu(inputs, filters, kernel_size = 1, strides = 1, name = None):
"""
Create a Convolutional Layer + Batch Normalization + ReLU Activation
args :
inputs : (tf.Tensor) input Tensor
filters : (int) number of filters
kernel_size : (int) size of the kernel
strides : (int) Value of stride
pad : ('VALID'/'SAME')
return :
tf.Tensor
"""
with tf.name_scope(name):
kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights')
conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding='VALID', data_format='NHWC')
norm = tf.contrib.layers.batch_norm(conv, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, scope = '_bn_relu')
with tf.device('/cpu:0'):
tf.summary.histogram('weights_summary', kernel, collections = ['train'])
return norm
def convBlock(inputs, numOut, name = 'convBlock'):
"""
Create a Convolutional Block Layer for Residual Units
args:
inputs : (tf.Tensor)
numOut : (int) number of output channels
return :
tf.Tensor
"""
# DIMENSION CONSERVED
with tf.name_scope(name):
norm_1 = tf.contrib.layers.batch_norm(inputs, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu)
conv_1 = conv2d(norm_1, int(numOut/2), kernel_size=1, strides=1, pad = 'VALID')
norm_2 = tf.contrib.layers.batch_norm(conv_1, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu)
pad = tf.pad(norm_2, np.array([[0,0],[1,1],[1,1],[0,0]]))
conv_2 = conv2d(pad, int(numOut/2), kernel_size=3, strides=1, pad = 'VALID')
norm_3 = tf.contrib.layers.batch_norm(conv_2, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu)
conv_3 = conv2d(norm_3, int(numOut), kernel_size=1, strides=1, pad = 'VALID')
return conv_3
def _build_residual_layer(self, name, inputs, k, rfsize, blocksize=2, stride=1): # rfsize: receptive field size
layer = dict()
with tf.variable_scope(name):
with tf.variable_scope('layer1'):
layer['filters1'] = tf.get_variable('filters1', [rfsize, rfsize, get_shape(inputs)[-1], k])
layer['conv1'] = tf.nn.conv2d(tf.pad(inputs, [[0, 0], [1, 1], [1, 1], [0, 0]], 'REFLECT'), layer['filters1'], strides=[1, stride, stride, 1], padding='VALID')
layer['bn1'] = inst_norm(layer['conv1'])
layer['fmap1'] = tf.nn.relu(layer['bn1'])
with tf.variable_scope('layer2'):
layer['filters2'] = tf.get_variable('filters2', [rfsize, rfsize, get_shape(inputs)[-1], k])
layer['conv2'] = tf.nn.conv2d(tf.pad(layer['fmap1'], [[0, 0], [1, 1], [1, 1], [0, 0]], 'REFLECT'), layer['filters2'], strides=[1, stride, stride, 1], padding='VALID')
layer['bn2'] = inst_norm(layer['conv2'])
# No ReLu here (following http://torch.ch/blog/2016/02/04/resnets.html, as indicated by the authors)
layer['fmap2'] = layer['bn2'] + inputs
return layer
def get_parameters():
params = tf.contrib.training.HParams(
# vocabulary
pad="</s>",
unk="UNK",
eos="</s>",
bos="</s>",
append_eos=False,
# model
rnn_cell="LegacyGRUCell",
embedding_size=620,
hidden_size=1000,
maxnum=2,
# regularization
dropout=0.2,
use_variational_dropout=False,
label_smoothing=0.1,
constant_batch_size=True,
batch_size=128,
max_length=60,
clip_grad_norm=5.0
)
return params
def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None):
if stride == 1:
return slim.conv2d(inputs, num_outputs, kernel_size, stride=1,
padding='SAME', scope=scope)
else: # ????1?????pad zero?pad zero???kernel_size - 1
# kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
inputs = tf.pad(inputs, # ???????????
[[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
padding='VALID', scope=scope)
########????Blocks???########
tensorflow_backend.py 文件源码
项目:deep-learning-keras-projects
作者: jasmeetsb
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'):
"""Pads the 2nd and 3rd dimensions of a 4D tensor
with "padding[0]" and "padding[1]" (resp.) zeros left and right.
# Returns
A padded 4D tensor.
# Raises
ValueError: if `dim_ordering` is neither `tf` or `th`.
"""
if dim_ordering == 'default':
dim_ordering = image_dim_ordering()
if dim_ordering not in {'th', 'tf'}:
raise ValueError('Unknown dim_ordering ' + str(dim_ordering))
if dim_ordering == 'th':
pattern = [[0, 0], [0, 0],
[padding[0], padding[0]], [padding[1], padding[1]]]
else:
pattern = [[0, 0],
[padding[0], padding[0]], [padding[1], padding[1]],
[0, 0]]
return tf.pad(x, pattern)
def _bbox_to_mask(yy, region_size, dtype):
# trim bounding box exeeding region_size on top and left
neg_part = tf.nn.relu(-yy[:2])
core = tf.ones(tf.to_int32(tf.round(yy[2:] - neg_part)), dtype=dtype)
y1 = tf.maximum(yy[0], 0.)
x1 = tf.maximum(yy[1], 0.)
y2 = tf.minimum(region_size[0], yy[0] + yy[2])
x2 = tf.minimum(region_size[1], yy[1] + yy[3])
padding = (y1, region_size[0] - y2, x1, region_size[1] - x2)
padding = tf.reshape(tf.stack(padding), (-1, 2))
padding = tf.to_int32(tf.round(padding))
mask = tf.pad(core, padding)
# trim bounding box exeeding region_size on bottom and right
rs = tf.to_int32(tf.round(region_size))
mask = mask[:rs[0], :rs[1]]
mask.set_shape((None, None))
return mask
def image_series_summary(tag, imgs, max_timesteps=10):
# take only 3 items from the minibatch
imgs = imgs[:, :3]
# assume img.shape == (T, batch_size, n_obj, H, W, C)
# let's log only for 1st obj
tf.cond(tf.equal(tf.rank(imgs), 6), lambda: imgs[:, :, 0], lambda: imgs)
shape = (max_timesteps,) + tuple(imgs.get_shape()[1:])
nt = tf.shape(imgs)[0]
def pad():
paddings = tf.concat(axis=0, values=([[0, max_timesteps - nt]], tf.zeros((len(shape) - 1, 2), tf.int32)))
return tf.pad(imgs, paddings)
imgs = tf.cond(tf.greater(nt, max_timesteps), lambda: imgs[:max_timesteps], pad)
imgs.set_shape(shape)
imgs = tf.squeeze(imgs)
imgs = tf.unstack(imgs)
# concatenate along the columns
imgs = tf.concat(axis=2, values=imgs)
tf.summary.image(tag, imgs)
def pad2d(inputs,
pad=(0, 0),
mode='CONSTANT',
data_format='NHWC',
trainable=True,
scope=None):
"""2D Padding layer, adding a symmetric padding to H and W dimensions.
Aims to mimic padding in Caffe and MXNet, helping the port of models to
TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`.
Args:
inputs: 4D input Tensor;
pad: 2-Tuple with padding values for H and W dimensions;
mode: Padding mode. C.f. `tf.pad`
data_format: NHWC or NCHW data format.
"""
with tf.name_scope(scope, 'pad2d', [inputs]):
# Padding shape.
if data_format == 'NHWC':
paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]
elif data_format == 'NCHW':
paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]]
net = tf.pad(inputs, paddings, mode=mode)
return net