def densenet_block(inputs, layer_num, growth, bc_mode, scope, is_training, keep_prob):
with tf.variable_scope(scope, 'block1', [inputs]):
currents = inputs
for idx in xrange(layer_num):
if not bc_mode:
new_feature = slim.conv2d(currents, growth,
[3, 3], scope='conv_{:d}'.format(idx))
new_feature = slim.dropout(new_feature, keep_prob=keep_prob,
is_training=is_training,
scope='dropout_{:d}'.format(idx))
else:
new_feature = slim.conv2d(currents, growth*4,
[1, 1], scope='bottom_{:d}'.format(idx))
new_feature = slim.dropout(new_feature, keep_prob=keep_prob,
is_training=is_training,
scope='dropout_b_{:d}'.format(idx))
new_feature = slim.conv2d(new_feature, growth,
[3, 3], scope='conv_{:d}'.format(idx))
new_feature = slim.dropout(new_feature, keep_prob=keep_prob,
is_training=is_training,
scope='dropout_{:d}'.format(idx))
currents = tf.concat([currents, new_feature], axis=3)
return currents
python类dropout()的实例源码
def build_inception_v1(self, prediction_fn=tf.nn.relu, scope='InceptionV1'):
"""
build basic inception v1 model
"""
# input features [batch_size, height, width, channels]
self.x = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name='input_layer')
self.y = tf.placeholder(tf.float32, [None, self.num_classes], name='output_layer')
# learning_rate placeholder
self.learning_rate = tf.placeholder(tf.float32, name='learning_rate')
# dropout layer: keep probability, vgg default value:0.5
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
with tf.variable_scope(name_or_scope=scope, reuse=False) as scope:
net, ent_point_nets = self.inception_v1_base(self.x, scope=scope)
with tf.variable_scope('Logits'):
net = slim.avg_pool2d(net, kernel_size=[7, 7], stride=1, scope='MaxPool_0a_7x7')
net = slim.dropout(net, self.keep_prob, scope='Dropout_0b')
# translate [1, 1, 1024] -> [1024]
net = net[:, 0, 0, :]
self.logits = slim.fully_connected(net, num_outputs=self.num_classes)
self.read_out_logits = prediction_fn(self.logits, name='Predictions')
def __init__(self):
self.raw_input_image = tf.placeholder(tf.float32, [None, 784])
self.input_images = tf.reshape(self.raw_input_image, [-1, 28, 28, 1])
self.raw_input_label = tf.placeholder("float", [None, 10])
self.input_labels = tf.cast(self.raw_input_label,tf.int32)
self.dropout = cfg.KEEP_PROB
with tf.variable_scope("Lenet") as scope:
self.train_digits = self.construct_net(True)
scope.reuse_variables()
self.pred_digits = self.construct_net(False)
self.prediction = tf.argmax(self.pred_digits, 1)
self.correct_prediction = tf.equal(tf.argmax(self.pred_digits, 1), tf.argmax(self.input_labels, 1))
self.train_accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, "float"))
self.loss = slim.losses.softmax_cross_entropy(self.train_digits, self.input_labels)
self.lr = cfg.LEARNING_RATE
self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
def fc_layers(net,
scope,
end_points_collection,
num_classes=10,
is_training=True,
dropout_keep_prob=0.5,
name_prefix=None):
def full_scope_name(scope_name):
return scope_name if name_prefix is None else '%s_%s' % (name_prefix, scope_name)
with slim.arg_scope([slim.fully_connected, slim.dropout],
outputs_collections=[end_points_collection]):
net = slim.fully_connected(net, num_classes, activation_fn=None,
scope=full_scope_name('fc9'))
return net, end_points_collection
def fc_layers(net,
scope,
end_points_collection,
num_classes=10,
is_training=True,
dropout_keep_prob=0.5,
name_prefix=None):
def full_scope_name(scope_name):
return scope_name if name_prefix is None else '%s_%s' % (name_prefix, scope_name)
with slim.arg_scope([slim.fully_connected, slim.dropout],
outputs_collections=[end_points_collection]):
'''
with dropout: accuracy: 0.7, data: 6.3M
without dropout: accuracy: 0.71, data: 6.3M
'''
# net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
# scope=full_scope_name('dropout3'))
net = slim.fully_connected(net, num_classes, activation_fn=None,
scope=full_scope_name('fc4'))
return net, end_points_collection
def fc_layers(net,
scope,
end_points_collection,
num_classes=10,
is_training=True,
dropout_keep_prob=0.5,
name_prefix=None):
def full_scope_name(scope_name):
return scope_name if name_prefix is None else '%s_%s' % (name_prefix, scope_name)
with slim.arg_scope([slim.fully_connected, slim.dropout],
outputs_collections=[end_points_collection]):
'''
with droupout accuracy: 0.68, data: 4.2M
without droupout accuracy: 0.71, data: 4.2M
'''
# net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
# scope=full_scope_name('dropout3'))
net = slim.fully_connected(net, num_classes, activation_fn=None,
scope=full_scope_name('fc4'))
return net, end_points_collection
def fc_layers(net,
scope,
end_points_collection,
num_classes=10,
is_training=True,
dropout_keep_prob=0.5,
name_prefix=None):
def full_scope_name(scope_name):
return scope_name if name_prefix is None else '%s_%s' % (name_prefix, scope_name)
with slim.arg_scope([slim.fully_connected, slim.dropout],
outputs_collections=[end_points_collection]):
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope=full_scope_name('dropout3'))
net = slim.fully_connected(net, num_classes, activation_fn=None,
scope=full_scope_name('fc4'))
return net, end_points_collection
def vgg16(inputs, num_classes, batch_size):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)):
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], padding="SAME", scope='conv1')
net = slim.max_pool2d(net, [2, 2], scope='pool1')
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], padding="SAME", scope='conv2')
net = slim.max_pool2d(net, [2, 2], scope='pool2')
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], padding="SAME", scope='conv3')
net = slim.max_pool2d(net, [2, 2], scope='pool3')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], padding="SAME", scope='conv4')
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], padding="SAME", scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
net = tf.reshape(net, (batch_size, 7 * 7 * 512))
net = slim.fully_connected(net, 4096, scope='fc6')
net = slim.dropout(net, 0.5, scope='dropout6')
net = slim.fully_connected(net, 4096, scope='fc7')
net = slim.dropout(net, 0.5, scope='dropout7')
net = slim.fully_connected(net, 1000, activation_fn=None, scope='fc8')
return net
def create_base(self, inputs, is_training):
params = self._config.cnn_params
print("input dimension = {}".format(inputs.get_shape()))
with tf.name_scope('Model'):
with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu,
# normalizer_fn=slim.batch_norm,
# normalizer_params={'is_training': is_training}
# weights_initializer=initializer = tf.contrib.layers.xavier_initializer(seed = 10)
):
# inputs is 2D with dimension (3 x feature_len)
net = slim.conv2d(inputs, params['num_filters'][0], [3,5], scope='conv1')
net = slim.conv2d(net, params['num_filters'][1], [3, 5], scope='conv2')
net = slim.conv2d(net, params['num_filters'][2], [3, 5], scope='conv3')
net = slim.flatten(net, scope='flatten1')
net = slim.fully_connected(net, params['num_fc_1'], scope='fc1')
net = slim.dropout(net, self._config.keep_prob, is_training=is_training, scope='dropout1')
logits = slim.fully_connected(net, self._config.num_classes, activation_fn=None, scope='fc2')
with tf.name_scope('output'):
predicted_classes = tf.to_int32(tf.argmax(logits, dimension=1), name='y')
return logits, predicted_classes
def _head_to_tail(self, pool5, is_training, reuse=False):
with tf.variable_scope(self._scope, self._scope, reuse=reuse):
pool5_flat = slim.flatten(pool5, scope='flatten')
fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
if is_training:
fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True,
scope='dropout6')
fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
if is_training:
fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True,
scope='dropout7')
return fc7
def conv_net_kelz(inputs):
"""Builds the ConvNet from Kelz 2016."""
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
factor=2.0, mode='FAN_AVG', uniform=True)):
net = slim.conv2d(inputs, 32, [3, 3], scope='conv1')
net = slim.conv2d(
net, 32, [3, 3], scope='conv2', normalizer_fn=slim.batch_norm)
net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool2')
net = slim.dropout(net, 0.25, scope='dropout2')
net = slim.conv2d(net, 64, [3, 3], scope='conv3')
net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool3')
net = slim.dropout(net, 0.25, scope='dropout3')
# Flatten while preserving batch and time dimensions.
dims = tf.shape(net)
net = tf.reshape(net, (dims[0], dims[1],
net.shape[2].value * net.shape[3].value), 'flatten4')
net = slim.fully_connected(net, 512, scope='fc5')
net = slim.dropout(net, 0.5, scope='dropout5')
return net
def squeezenet_inference(inputs, is_training, keep_prob):
nets = slim.conv2d(inputs, 64,
[3, 3], scope='conv1')
nets = slim.max_pool2d(nets, [3, 3], padding='SAME', scope='pool1') # 56*48*64
nets = fire_module(nets, 16, 64, scope='fire2')
nets = fire_module(nets, 16, 64, scope='fire3')
nets = slim.max_pool2d(nets, [3, 3], padding='SAME', scope='pool1') # 28*24*128
nets = fire_module(nets, 32, 128, scope='fire4')
nets = fire_module(nets, 32, 128, scope='fire5')
nets = slim.max_pool2d(nets, [3, 3], padding='SAME', scope='pool5') # 14*12*256
nets = fire_module(nets, 48, 192, scope='fire6')
nets = fire_module(nets, 48, 192, scope='fire7')
nets = slim.max_pool2d(nets, [3, 3], padding='SAME', scope='pool6') # 7*6*384
nets = fire_module(nets, 64, 256, scope='fire8')
nets = fire_module(nets, 64, 256, scope='fire9') # 7*6*512
nets = slim.dropout(nets, keep_prob, is_training=is_training, scope='dropout9')
nets = slim.avg_pool2d(nets, [7, 6], scope='pool9') # 1*1*512
return nets
def transition_block(inputs, reduction, scope, is_training, keep_prob):
"""Call H_l composite function with 1x1 kernel and after average
pooling
"""
with tf.variable_scope(scope, 'trans1', [inputs]):
# call composite function with 1x1 kernel
out_features = int(int(inputs.get_shape()[-1]) * reduction)
nets = slim.conv2d(inputs, out_features,
[1, 1], scope='conv')
nets = slim.dropout(nets, keep_prob=keep_prob,
is_training=is_training,
scope='dropout')
# run average pooling
nets = slim.avg_pool2d(nets, [2, 2], scope='pool')
return nets
def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None):
batch_norm_params = {
# Decay for the moving averages.
'decay': 0.995,
# epsilon to prevent 0s in variance.
'epsilon': 0.001,
# force in-place updates of mean and variance estimates
'updates_collections': None,
# Moving averages ends up in the trainable variables collection
'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
}
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_initializer=slim.xavier_initializer_conv2d(uniform=True),
weights_regularizer=slim.l2_regularizer(weight_decay),
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with tf.variable_scope('squeezenet', [images], reuse=reuse):
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=phase_train):
net = slim.conv2d(images, 96, [7, 7], stride=2, scope='conv1')
net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool1')
net = fire_module(net, 16, 64, scope='fire2')
net = fire_module(net, 16, 64, scope='fire3')
net = fire_module(net, 32, 128, scope='fire4')
net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4')
net = fire_module(net, 32, 128, scope='fire5')
net = fire_module(net, 48, 192, scope='fire6')
net = fire_module(net, 48, 192, scope='fire7')
net = fire_module(net, 64, 256, scope='fire8')
net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool8')
net = fire_module(net, 64, 256, scope='fire9')
net = slim.dropout(net, keep_probability)
net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv10')
net = slim.avg_pool2d(net, net.get_shape()[1:3], scope='avgpool10')
net = tf.squeeze(net, [1, 2], name='logits')
net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None,
scope='Bottleneck', reuse=False)
return net, None
def hidden_layers_starting_at(self, layer, layer_sizes, opts=None):
# TODO: opts=None => will force exception on old calls....
if not isinstance(layer_sizes, list):
layer_sizes = map(int, layer_sizes.split(","))
assert len(layer_sizes) > 0
for i, size in enumerate(layer_sizes):
layer = slim.fully_connected(scope="h%d" % i,
inputs=layer,
num_outputs=size,
weights_regularizer=tf.contrib.layers.l2_regularizer(0.01),
activation_fn=tf.nn.relu)
if opts.use_dropout:
layer = slim.dropout(layer, is_training=IS_TRAINING, scope="do%d" % i)
return layer
def _head_to_tail(self, pool5, is_training, reuse=None):
with tf.variable_scope(self._scope, self._scope, reuse=reuse):
pool5_flat = slim.flatten(pool5, scope='flatten')
fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
if is_training:
fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True,
scope='dropout6')
fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
if is_training:
fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True,
scope='dropout7')
return fc7
def regression_model(inputs, is_training=True, scope="deep_regression"):
"""Creates the regression model.
Args:
inputs: A node that yields a `Tensor` of size [batch_size, dimensions].
is_training: Whether or not we're currently training the model.
scope: An optional variable_op scope for the model.
Returns:
predictions: 1-D `Tensor` of shape [batch_size] of responses.
end_points: A dict of end points representing the hidden layers.
"""
with tf.variable_scope(scope, 'deep_regression', [inputs]):
end_points = {}
# Set the default weight _regularizer and acvitation for each fully_connected layer.
with slim.arg_scope([slim.fully_connected],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(0.01)):
# Creates a fully connected layer from the inputs with 32 hidden units.
net = slim.fully_connected(inputs, 32, scope='fc1')
end_points['fc1'] = net
# Adds a dropout layer to prevent over-fitting.
net = slim.dropout(net, 0.8, is_training=is_training)
# Adds another fully connected layer with 16 hidden units.
net = slim.fully_connected(net, 16, scope='fc2')
end_points['fc2'] = net
# Creates a fully-connected layer with a single hidden unit. Note that the
# layer is made linear by setting activation_fn=None.
predictions = slim.fully_connected(net, 1, activation_fn=None, scope='prediction')
end_points['out'] = predictions
return predictions, end_points
def __dropout(self,net):
net_shape = net.get_shape().as_list()
noise_shape = [net_shape[0],1,1,net_shape[-1]]
return slim.dropout(net, noise_shape=noise_shape)
def get_model(self,inputs, weight_decay=0.0005,is_training=False):
# End_points collect relevant activations for external use.
arg_scope = self.__arg_scope(weight_decay=weight_decay)
with slim.arg_scope(arg_scope):
end_points = {}
with tf.variable_scope('vgg_16', [inputs]):
# Original VGG-16 blocks.
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
end_points['block1'] = net
net = slim.max_pool2d(net, [2, 2], scope='pool1')
# Block 2.
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
end_points['block2'] = net
net = slim.max_pool2d(net, [2, 2], scope='pool2')
# Block 3.
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
end_points['block3'] = net
net = slim.max_pool2d(net, [2, 2], scope='pool3')
# Block 4.
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
end_points['block4'] = net
net = slim.max_pool2d(net, [2, 2], scope='pool4')
# Block 5.
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
end_points['block5'] = net
net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')
# Additional SSD blocks.
keep_prob=0.8
with slim.arg_scope([slim.conv2d],
activation_fn=None):
with slim.arg_scope([slim.batch_norm],
activation_fn=tf.nn.relu, is_training=is_training,updates_collections=None):
with slim.arg_scope([slim.dropout],
is_training=is_training,keep_prob=keep_prob):
with tf.variable_scope(self.model_name):
return self.__additional_ssd_block(end_points, net)
def hidden_layers_on(self, layer, layer_sizes):
if not isinstance(layer_sizes, list):
layer_sizes = map(int, layer_sizes.split(","))
assert len(layer_sizes) > 0
for i, size in enumerate(layer_sizes):
layer = slim.fully_connected(scope="h%d" % i,
inputs=layer,
num_outputs=size,
weights_regularizer=tf.contrib.layers.l2_regularizer(0.01),
activation_fn=tf.nn.relu)
# if opts.use_dropout:
# layer = slim.dropout(layer, is_training=IS_TRAINING, scope="do%d" % i)
return layer
def create_inner_block(
incoming, scope, nonlinearity=tf.nn.elu,
weights_initializer=tf.truncated_normal_initializer(1e-3),
bias_initializer=tf.zeros_initializer(), regularizer=None,
increase_dim=False, summarize_activations=True):
n = incoming.get_shape().as_list()[-1]
stride = 1
if increase_dim:
n *= 2
stride = 2
incoming = slim.conv2d(
incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME",
normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer,
biases_initializer=bias_initializer, weights_regularizer=regularizer,
scope=scope + "/1")
if summarize_activations:
tf.summary.histogram(incoming.name + "/activations", incoming)
incoming = slim.dropout(incoming, keep_prob=0.6)
incoming = slim.conv2d(
incoming, n, [3, 3], 1, activation_fn=None, padding="SAME",
normalizer_fn=None, weights_initializer=weights_initializer,
biases_initializer=bias_initializer, weights_regularizer=regularizer,
scope=scope + "/2")
return incoming
def _network_factory(num_classes, is_training, weight_decay=1e-8):
def factory_fn(image, reuse, l2_normalize):
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training):
with slim.arg_scope([slim.conv2d, slim.fully_connected,
slim.batch_norm, slim.layer_norm],
reuse=reuse):
features, logits = _create_network(
image, num_classes, l2_normalize=l2_normalize,
reuse=reuse, create_summaries=is_training,
weight_decay=weight_decay)
return features, logits
return factory_fn
def construct_net(self,is_trained = True):
with slim.arg_scope([slim.conv2d], padding='VALID',
weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
weights_regularizer=slim.l2_regularizer(0.0005)):
net = slim.conv2d(self.input_images,6,[5,5],1,padding='SAME',scope='conv1')
net = slim.max_pool2d(net, [2, 2], scope='pool2')
net = slim.conv2d(net,16,[5,5],1,scope='conv3')
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.conv2d(net,120,[5,5],1,scope='conv5')
net = slim.flatten(net, scope='flat6')
net = slim.fully_connected(net, 84, scope='fc7')
net = slim.dropout(net, self.dropout,is_training=is_trained, scope='dropout8')
digits = slim.fully_connected(net, 10, scope='fc9')
return digits
def cnn_layers(inputs, scope, end_points_collection, dropout_keep_prob=0.8, is_training=True):
# Collect outputs for conv2d and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
scope='conv1')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
net = slim.conv2d(net, 192, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_normal(0.005),
biases_initializer=tf.constant_initializer(0.1),
outputs_collections=[end_points_collection]):
net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
return net, end_points_collection
def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None):
batch_norm_params = {
# Decay for the moving averages.
'decay': 0.995,
# epsilon to prevent 0s in variance.
'epsilon': 0.001,
# force in-place updates of mean and variance estimates
'updates_collections': None,
# Moving averages ends up in the trainable variables collection
'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
}
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_initializer=slim.xavier_initializer_conv2d(uniform=True),
weights_regularizer=slim.l2_regularizer(weight_decay),
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with tf.variable_scope('squeezenet', [images], reuse=reuse):
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=phase_train):
net = slim.conv2d(images, 96, [7, 7], stride=2, scope='conv1')
net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool1')
net = fire_module(net, 16, 64, scope='fire2')
net = fire_module(net, 16, 64, scope='fire3')
net = fire_module(net, 32, 128, scope='fire4')
net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4')
net = fire_module(net, 32, 128, scope='fire5')
net = fire_module(net, 48, 192, scope='fire6')
net = fire_module(net, 48, 192, scope='fire7')
net = fire_module(net, 64, 256, scope='fire8')
net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool8')
net = fire_module(net, 64, 256, scope='fire9')
net = slim.dropout(net, keep_probability)
net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv10')
net = slim.avg_pool2d(net, net.get_shape()[1:3], scope='avgpool10')
net = tf.squeeze(net, [1, 2], name='logits')
net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None,
scope='Bottleneck', reuse=False)
return net, None
def _head_to_tail(self, pool5, is_training, reuse=False):
with tf.variable_scope(self._scope, self._scope, reuse=reuse):
pool5_flat = slim.flatten(pool5, scope='flatten')
fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
if is_training:
fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True,
scope='dropout6')
fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
if is_training:
fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True,
scope='dropout7')
return fc7
def create_base(self, inputs, is_training):
"""Creates a base part of the Model (no gradients, losses or summaries)."""
with tf.name_scope('Model'):
with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu,
# weights_regularizer=slim.l2_regularizer(0.01),
# weights_initializer=initializers.xavier_initializer(seed=self._config.random_seed),
# biases_initializer=tf.constant_initializer(0.1)
):
# first fully connected layer
net = slim.fully_connected(inputs, self._config.mlp_params['hidden_sizes'][0], scope='fc1')
# dropout1
net = slim.dropout(net, self._config.keep_prob, is_training=is_training, scope='dropout1')
# second fully connected layer
net = slim.fully_connected(net, self._config.mlp_params['hidden_sizes'][1], scope='fc2')
# dropout2
net = slim.dropout(net, self._config.keep_prob, is_training=is_training, scope='dropout2')
# final fully-connected dense layer
logits = slim.fully_connected(net, self._config.num_classes, activation_fn=None, scope='fc3')
with tf.name_scope('output'):
predicted_classes = tf.to_int32(tf.argmax(logits, dimension=1), name='y')
return logits, predicted_classes
def inference_network(x, xwidth=28, xheight=28, zdim=2):
"""Inference network to parameterize variational model. It takes
data as input and outputs the variational parameters.
mu, sigma = neural_network(x)
"""
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.elu,
normalizer_fn=slim.batch_norm,
normalizer_params={'scale': True}):
net = tf.reshape(x, [N_MINIBATCH, 28, 28, 1])
net = slim.conv2d(net, 32, 5, stride=2)
net = slim.conv2d(net, 64, 5, stride=2)
net = slim.conv2d(net, 128, 5, padding='VALID')
net = slim.dropout(net, 0.9)
net = slim.flatten(net)
params = slim.fully_connected(net, zdim * 2, activation_fn=None)
mu = params[:, :zdim]
#sigma = tf.nn.softplus(params[:, zdim:])
sigma = params[:, zdim:]
return mu, sigma
##########################################
# make variational lower bound objective #
##########################################
def create_model(self,
model_input,
vocab_size,
keep_prob,
num_mixtures=None,
l2_penalty=1e-8,
**unused_params):
num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
new_model_input = slim.dropout(model_input,keep_prob=keep_prob)
#new_model_input = model_input
gate_activations = slim.fully_connected(
new_model_input,
vocab_size * (num_mixtures + 1),
activation_fn=None,
biases_initializer=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="gates")
expert_activations = slim.fully_connected(
new_model_input,
vocab_size * num_mixtures,
activation_fn=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="experts")
gating_distribution = tf.nn.softmax(tf.reshape(
gate_activations,
[-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1)
expert_distribution = tf.nn.sigmoid(tf.reshape(
expert_activations,
[-1, num_mixtures])) # (Batch * #Labels) x num_mixtures
final_probabilities_by_class_and_batch = tf.reduce_sum(
gating_distribution[:, :num_mixtures] * expert_distribution, 1)
final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
[-1, vocab_size])
return {"predictions": final_probabilities,
"features": model_input}
def create_model(self,
model_input,
vocab_size,
keep_prob,
num_mixtures=None,
l2_penalty=1e-8,
**unused_params):
num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
new_model_input = slim.dropout(model_input,keep_prob=keep_prob)
#new_model_input = model_input
gate_activations = slim.fully_connected(
new_model_input,
vocab_size * (num_mixtures + 1),
activation_fn=None,
biases_initializer=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="gates")
expert_activations = slim.fully_connected(
new_model_input,
vocab_size * num_mixtures,
activation_fn=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="experts")
gating_distribution = tf.nn.softmax(tf.reshape(
gate_activations,
[-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1)
expert_distribution = tf.nn.sigmoid(tf.reshape(
expert_activations,
[-1, num_mixtures])) # (Batch * #Labels) x num_mixtures
final_probabilities_by_class_and_batch = tf.reduce_sum(
gating_distribution[:, :num_mixtures] * expert_distribution, 1)
final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
[-1, vocab_size])
return {"predictions": final_probabilities,
"zhaofeatures": model_input}