def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bboxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
if cfg.RESNET.MAX_POOL:
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size],
name="crops")
crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
else:
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE],
name="crops")
return crops
# Do the first few layers manually, because 'SAME' padding can behave inconsistently
# for images of different sizes: sometimes 0, sometimes 1
python类max_pool2d()的实例源码
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
def vgg_arg_scope(weight_decay=0.0005):
"""Defines the VGG arg scope.
Args:
weight_decay: The l2 regularization coefficient.
Returns:
An arg_scope.
"""
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(weight_decay),
biases_initializer=tf.zeros_initializer()):
with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
return arg_sc
def _extra_conv_arg_scope_with_bn(weight_decay=0.00001,
activation_fn=None,
batch_norm_decay=0.997,
batch_norm_epsilon=1e-5,
batch_norm_scale=True):
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS,
}
with slim.arg_scope(
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
return arg_sc
def encoder(self, x):
with tf.variable_scope('encoder'):
net = resnet_utils.conv2d_same(x, 64, 7, stride=2, scope='conv1')
net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
x = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1')
x_features_all, _ = resnet_v1.resnet_v1(x,
self._blocks_encoder,
global_pool=False,
include_root_block=False,
scope=self._resnet_scope)
x_features_all = tf.reduce_mean(x_features_all, axis=[1, 2])
x_features_labeled, x_features_unlabeled = tf.split(x_features_all, 2)
x_features_tiled = tf.tile(x_features_unlabeled, [self._num_classes, 1]) # (100, 256) --> (2100, 256)
x_features = tf.concat([x_features_labeled, x_features_tiled], 0) # (2100, 256) --> (2200, 256)
return x_features
def encoder(self, x):
with tf.variable_scope('encoder'):
net = resnet_utils.conv2d_same(x, 64, 7, stride=2, scope='conv1')
net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
x = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1')
x_features_all, _ = resnet_v1.resnet_v1(x,
self._blocks_encoder,
global_pool=False,
include_root_block=False,
scope=self._resnet_scope)
x_features_all = tf.reduce_mean(x_features_all, axis=[1, 2])
x_features_labeled, x_features_unlabeled = tf.split(x_features_all, 2)
x_features_tiled = tf.tile(x_features_unlabeled, [self._num_classes, 1]) # (100, 256) --> (2100, 256)
x_features = tf.concat([x_features_labeled, x_features_tiled], 0) # (2100, 256) --> (2200, 256)
return x_features
def squeezenet(inputs,
num_classes=1000,
is_training=True,
keep_prob=0.5,
spatial_squeeze=True,
scope='squeeze'):
"""
squeezenetv1.1
"""
with tf.name_scope(scope, 'squeeze', [inputs]) as sc:
end_points_collection = sc + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.max_pool2d,
slim.avg_pool2d, fire_module],
outputs_collections=end_points_collection):
nets = squeezenet_inference(inputs, is_training, keep_prob)
nets = slim.conv2d(nets, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='logits')
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze:
nets = tf.squeeze(nets, [1, 2], name='logits/squeezed')
return nets, end_points
def densenet_inference(inputs, is_training, keep_prob, growth_rate, reduction):
first_output_fea = growth_rate * 2
nets = slim.conv2d(inputs, first_output_fea,
[5, 5], scope='conv0')
nets = slim.max_pool2d(nets, [3, 3], padding='SAME', scope='pool0') # 56*48*64
nets = densenet_block(nets, 6, growth_rate, True,
'block1', is_training, keep_prob)
nets = transition_block(nets, reduction, 'trans1', is_training, keep_prob) # 28*24*256
nets = densenet_block(nets, 12, growth_rate, True,
'block2', is_training, keep_prob)
nets = transition_block(nets, reduction, 'trans2', is_training, keep_prob) # 14*12*640
nets = densenet_block(nets, 24, growth_rate, True,
'block3', is_training, keep_prob)
nets = transition_block(nets, reduction, 'trans3', is_training, keep_prob) # 7*6*1408
nets = densenet_block(nets, 16, growth_rate, True,
'block4', is_training, keep_prob) # 7*6*1920
nets = slim.avg_pool2d(nets, [7, 6], scope='pool4') # 1*1*1920
return nets
def _root_block(input,
initial_conv_filters,
weight_decay=5e-4,
ksize=(7,7),
is_pool=True):
''' Adds an initial conv block, with batch norm and relu for the DPN
Args:
input: input tensor
initial_conv_filters: number of filters for initial conv block
weight_decay: weight decay factor
Returns: a keras tensor
'''
x = slim.conv2d(input,
initial_conv_filters,
ksize,
padding='SAME',
stride=(1, 1),
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.contrib.layers.xavier_initializer(),
biases_initializer=None)
x = slim.batch_norm(x)
x = tf.nn.relu(x)
if is_pool:
x = slim.max_pool2d(x, (3, 3), stride=(2, 2), padding='SAME')
return x
def reduction_a(net, k, l, m, n):
with tf.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID',
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3,
scope='Conv2d_0b_3x3')
tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3,
stride=2, padding='VALID',
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'):
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
return net
def reduction_b(net):
with tf.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'):
tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3,
scope='Conv2d_0b_3x3')
tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_3'):
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat([tower_conv_1, tower_conv1_1,
tower_conv2_2, tower_pool], 3)
return net
def _extra_conv_arg_scope_with_bn(weight_decay=0.00001,
activation_fn=None,
batch_norm_decay=0.997,
batch_norm_epsilon=1e-5,
batch_norm_scale=True):
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS_EXTRA,
}
with slim.arg_scope(
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
return arg_sc
def subsample(inputs, factor, scope=None):
"""Subsamples the input along the spatial dimensions.
Args:
inputs: A `Tensor` of size [batch, height_in, width_in, channels].
factor: The subsampling factor.
scope: Optional variable_scope.
Returns:
output: A `Tensor` of size [batch, height_out, width_out, channels] with the
input, either intact (if factor == 1) or subsampled (if factor > 1).
"""
if factor == 1:
return inputs
else:
return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bboxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
if cfg.RESNET.MAX_POOL:
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size],
name="crops")
crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
else:
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE],
name="crops")
return crops
# Do the first few layers manually, because 'SAME' padding can behave inconsistently
# for images of different sizes: sometimes 0, sometimes 1
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
def _image_to_head(self, is_training, reuse=None):
with tf.variable_scope(self._scope, self._scope, reuse=reuse):
net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3],
trainable=False, scope='conv1')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1')
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3],
trainable=False, scope='conv2')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2')
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3],
trainable=is_training, scope='conv3')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
trainable=is_training, scope='conv4')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
trainable=is_training, scope='conv5')
self._act_summaries.append(net)
self._layers['head'] = net
return net
def create_network(self, input, trainable):
if trainable:
wr = slim.l2_regularizer(self.regularization)
else:
wr = None
# the input is stack of black and white frames.
# put the stack in the place of channel (last in tf)
input_t = tf.transpose(input, [0, 2, 3, 1])
net = slim.conv2d(input_t, 8, (7, 7), data_format="NHWC",
activation_fn=tf.nn.relu, stride=3, weights_regularizer=wr, trainable=trainable)
net = slim.max_pool2d(net, 2, 2)
net = slim.conv2d(net, 16, (3, 3), data_format="NHWC",
activation_fn=tf.nn.relu, weights_regularizer=wr, trainable=trainable)
net = slim.max_pool2d(net, 2, 2)
net = slim.flatten(net)
net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu,
weights_regularizer=wr, trainable=trainable)
q_state_action_values = slim.fully_connected(net, self.dim_actions,
activation_fn=None, weights_regularizer=wr, trainable=trainable)
return q_state_action_values
def __arg_scope(self, weight_decay=0.0005, data_format='NHWC'):
"""Defines the VGG arg scope.
Args:
weight_decay: The l2 regularization coefficient.
Returns:
An arg_scope.
"""
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.contrib.layers.xavier_initializer(),
biases_initializer=tf.zeros_initializer()):
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
padding='SAME',
data_format=data_format):
with slim.arg_scope([custom_layers.pad2d,
custom_layers.l2_normalization,
custom_layers.channel_to_last],
data_format=data_format) as sc:
return sc
def cnn_layers(inputs, scope, end_points_collection, dropout_keep_prob=0.8, is_training=True):
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
with slim.arg_scope([slim.conv2d],
normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training},
activation_fn=leaky_relu):
net = slim.conv2d(inputs, 32, [3, 3], scope='conv1')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
net = slim.conv2d(net, 64, [3, 3], scope='conv2')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
net = slim.conv2d(net, 128, [3, 3], scope='conv3')
net = slim.conv2d(net, 64, [1, 1], scope='conv4')
box_net = net = slim.conv2d(net, 128, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool5')
net = slim.conv2d(net, 256, [3, 3], scope='conv6')
net = slim.conv2d(net, 128, [1, 1], scope='conv7')
net = slim.conv2d(net, 256, [3, 3], scope='conv8')
box_net = _reorg(box_net, 2)
net = tf.concat([box_net, net], 3)
net = slim.conv2d(net, 256, [3, 3], scope='conv9')
net = slim.conv2d(net, 75, [1, 1], activation_fn=None, scope='conv10')
return net, end_points_collection
def cnn_layers(inputs, scope, end_points_collection, dropout_keep_prob=0.8, is_training=True):
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 48, [5, 5], scope='conv1')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
net = slim.conv2d(net, 64, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
net = slim.conv2d(net, 128, [5, 5], scope='conv3')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool3')
net = slim.conv2d(net, 160, [5, 5], scope='conv4')
net = slim.conv2d(net, 192, [5, 5], scope='conv5')
net = slim.conv2d(net, 192, [5, 5], scope='conv6')
net = slim.conv2d(net, 192, [5, 5], scope='conv7')
net = slim.flatten(net)
# By removing the fc layer, we'll get much smaller model with almost the same performance
# net = slim.fully_connected(net, 3072, scope='fc8')
return net, end_points_collection
def cnn_layers(inputs, scope, end_points_collection, dropout_keep_prob=0.8, is_training=True):
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 32, [5, 5], scope='conv1')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
net = slim.conv2d(net, 64, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
net = slim.conv2d(net, 64, [5, 5], scope='conv3')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool3')
net = slim.conv2d(net, 64, [5, 5], scope='conv4')
net = slim.conv2d(net, 64, [5, 5], scope='conv5')
net = slim.conv2d(net, 64, [5, 5], scope='conv6')
net = slim.conv2d(net, 64, [5, 5], scope='conv7')
net = slim.flatten(net)
net = slim.fully_connected(net, 256, scope='fc3')
return net, end_points_collection
def cnn_layers(inputs, scope, end_points_collection, dropout_keep_prob=0.8, is_training=True):
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 32, [5, 5], scope='conv1')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
net = slim.conv2d(net, 64, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
net = slim.conv2d(net, 64, [5, 5], scope='conv3')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool3')
net = slim.conv2d(net, 64, [5, 5], scope='conv4')
net = slim.conv2d(net, 64, [5, 5], scope='conv5')
net = slim.conv2d(net, 64, [5, 5], scope='conv6')
net = slim.conv2d(net, 64, [5, 5], scope='conv7')
net = slim.flatten(net)
net = slim.fully_connected(net, 128, scope='fc3')
return net, end_points_collection
def reduction_a(net, k, l, m, n):
with tf.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID',
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3,
scope='Conv2d_0b_3x3')
tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3,
stride=2, padding='VALID',
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'):
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
return net
def reduction_b(net):
with tf.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'):
tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3,
scope='Conv2d_0b_3x3')
tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_3'):
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat([tower_conv_1, tower_conv1_1,
tower_conv2_2, tower_pool], 3)
return net
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bboxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
if cfg.RESNET.MAX_POOL:
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size],
name="crops")
crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
else:
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE],
name="crops")
return crops
# Do the first few layers manually, because 'SAME' padding can behave inconsistently
# for images of different sizes: sometimes 0, sometimes 1
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
def _image_to_head(self, is_training, reuse=False):
with tf.variable_scope(self._scope, self._scope, reuse=reuse):
net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3],
trainable=False, scope='conv1')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1')
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3],
trainable=False, scope='conv2')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2')
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3],
trainable=is_training, scope='conv3')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
trainable=is_training, scope='conv4')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
trainable=is_training, scope='conv5')
self._act_summaries.append(net)
self._layers['head'] = net
return net
def vgg16(inputs, num_classes, batch_size):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)):
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], padding="SAME", scope='conv1')
net = slim.max_pool2d(net, [2, 2], scope='pool1')
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], padding="SAME", scope='conv2')
net = slim.max_pool2d(net, [2, 2], scope='pool2')
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], padding="SAME", scope='conv3')
net = slim.max_pool2d(net, [2, 2], scope='pool3')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], padding="SAME", scope='conv4')
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], padding="SAME", scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
net = tf.reshape(net, (batch_size, 7 * 7 * 512))
net = slim.fully_connected(net, 4096, scope='fc6')
net = slim.dropout(net, 0.5, scope='dropout6')
net = slim.fully_connected(net, 4096, scope='fc7')
net = slim.dropout(net, 0.5, scope='dropout7')
net = slim.fully_connected(net, 1000, activation_fn=None, scope='fc8')
return net
def create_model(self, model_input, num_classes=2, l2_penalty=1e-8, **unused_params):
net = slim.conv2d(model_input, 64, [3, 3], scope='conv1_1')
# net = slim.conv2d(net, 64, [3, 3], scope='conv1_2')
net = slim.max_pool2d(net, [2, 2], scope='pool1')
# net = slim.conv2d(net, 128, [3, 3], scope='conv2_1')
# net = slim.conv2d(net, 128, [3, 3], scope='conv2_2')
# net = slim.max_pool2d(net, [2, 2], scope='pool2')
# net = slim.conv2d(net, 258, [3, 3], scope='conv3_1')
# net = slim.conv2d(net, 258, [3, 3], scope='conv3_2')
# net = slim.max_pool2d(net, [2, 2], scope='pool3')
net = slim.flatten(net)
output = slim.fully_connected(
net, num_classes - 1, activation_fn=tf.nn.sigmoid,
weights_regularizer=slim.l2_regularizer(l2_penalty))
return {"predictions": output}
def _extra_conv_arg_scope_with_bn(weight_decay=0.00001,
activation_fn=None,
batch_norm_decay=0.997,
batch_norm_epsilon=1e-5,
batch_norm_scale=True):
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS,
}
with slim.arg_scope(
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
return arg_sc