def create_architecture(self, mode, tag=None):
training = mode == 'TRAIN'
testing = mode == 'TEST'
assert tag != None
# handle most of the regularizers here
weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)
biases_regularizer = weights_regularizer
# list as many types of layers as possible, even if they are not used now
with arg_scope([slim.conv2d, slim.conv2d_in_plane,
slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
weights_regularizer=weights_regularizer,
biases_regularizer=biases_regularizer,
biases_initializer=tf.constant_initializer(0.0)):
self.build_network()
elbo = self.add_losses()
self._summary_op = tf.summary.merge_all()
return elbo
python类conv2d_transpose()的实例源码
def upsample(x,scale=2,features=64,activation=tf.nn.relu):
assert scale in [2,3,4]
x = slim.conv2d(x,features,[3,3],activation_fn=activation)
if scale == 2:
ps_features = 3*(scale**2)
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,6,stride=1,activation_fn=activation)
x = PS(x,2,color=True)
elif scale == 3:
ps_features =3*(scale**2)
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,9,stride=1,activation_fn=activation)
x = PS(x,3,color=True)
elif scale == 4:
ps_features = 3*(2**2)
for i in range(2):
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,6,stride=1,activation_fn=activation)
x = PS(x,2,color=True)
return x
def _extra_conv_arg_scope(weight_decay=0.00001, activation_fn=None, normalizer_fn=None):
with slim.arg_scope(
[slim.conv2d, slim.conv2d_transpose],
padding='SAME',
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,) as arg_sc:
with slim.arg_scope(
[slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn) as arg_sc:
return arg_sc
def build_model(self):
with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu):
with tf.variable_scope('model', reuse=self.reuse_variables):
self.left_pyramid = self.scale_pyramid(self.left, 4)
if self.mode == 'train':
self.right_pyramid = self.scale_pyramid(self.right, 4)
if self.params.do_stereo:
self.model_input = tf.concat([self.left, self.right], 3)
else:
self.model_input = self.left
#build model
if self.params.encoder == 'vgg':
self.build_vgg()
elif self.params.encoder == 'resnet50':
self.build_resnet50()
else:
return None
def _extra_conv_arg_scope(weight_decay=0.00001, activation_fn=None, normalizer_fn=None):
with slim.arg_scope(
[slim.conv2d, slim.conv2d_transpose],
padding='SAME',
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,) as arg_sc:
with slim.arg_scope(
[slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn) as arg_sc:
return arg_sc
def build_model(input, image_size=64):
with slim.arg_scope([slim.conv2d_transpose], kernel_size=[5, 5], stride=2,
activation_fn=None):
net = linear(input, 2 * image_size * image_size, 'generator/linear_1') # output_size=2^13
net = tf.reshape(net, [-1, image_size // 16, image_size // 16, 512], name='generator/reshape_2')
net = BatchNorm(net, name="batch_norm_3")
net = tf.nn.relu(net)
net = slim.conv2d_transpose(inputs=net, num_outputs=256, padding="SAME", name="generator/deconv_4")
net = BatchNorm(net, name="batch_norm_5")
net = tf.nn.relu(net)
net = slim.conv2d_transpose(inputs=net, num_outputs=128, padding="SAME", name="generator/deconv_6")
net = BatchNorm(net, name="batch_norm_7")
net = tf.nn.relu(net)
net = slim.conv2d_transpose(inputs=net, num_outputs=64, padding="SAME", name="generator/deconv_8")
net = BatchNorm(net, name="batch_norm_9")
net = tf.nn.relu(net)
net = slim.conv2d_transpose(inputs=net, num_outputs=3, padding="SAME", name="generator/deconv_10")
net = tf.nn.tanh(net)
return net
def build_model(self):
with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu):
with tf.variable_scope('model', reuse=self.reuse_variables):
self.left_pyramid = self.scale_pyramid(self.left, 4)
if self.mode == 'train':
self.right_pyramid = self.scale_pyramid(self.right, 4)
if self.params.do_stereo:
self.model_input = tf.concat([self.left, self.right], 3)
else:
self.model_input = self.left
#build model
if self.params.encoder == 'vgg':
self.build_vgg()
elif self.params.encoder == 'resnet50':
self.build_resnet50()
else:
return None
def _extra_conv_arg_scope(weight_decay=0.00001, activation_fn=None, normalizer_fn=None):
with slim.arg_scope(
[slim.conv2d, slim.conv2d_transpose],
padding='SAME',
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,) as arg_sc:
with slim.arg_scope(
[slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn) as arg_sc:
return arg_sc
def generative_network(z, zdim):
"""Generative network to parameterize generative model. It takes
latent variables as input and outputs the likelihood parameters.
logits = neural_network(z)
Args:
z = tensor input
d = latent variable dimension
"""
with slim.arg_scope([slim.conv2d_transpose],
activation_fn=tf.nn.elu,
normalizer_fn=slim.batch_norm,
normalizer_params={'scale': True}):
net = tf.reshape(z, [N_MINIBATCH, 1, 1, zdim])
net = slim.conv2d_transpose(net, 128, 3, padding='VALID')
net = slim.conv2d_transpose(net, 64, 5, padding='VALID')
net = slim.conv2d_transpose(net, 32, 5, stride=2)
net = slim.conv2d_transpose(net, 1, 5, stride=2, activation_fn=None)
net = slim.flatten(net)
#net = slim.nn.sigmoid(net)
return net
def generator(self, inputs, reuse=False):
# inputs: (batch, 1, 1, 128)
with tf.variable_scope('generator', reuse=reuse):
with slim.arg_scope([slim.conv2d_transpose], padding='SAME', activation_fn=None,
stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()):
with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True,
activation_fn=tf.nn.relu, is_training=(self.mode=='train')):
net = slim.conv2d_transpose(inputs, 512, [4, 4], padding='VALID', scope='conv_transpose1') # (batch_size, 4, 4, 512)
net = slim.batch_norm(net, scope='bn1')
net = slim.conv2d_transpose(net, 256, [3, 3], scope='conv_transpose2') # (batch_size, 8, 8, 256)
net = slim.batch_norm(net, scope='bn2')
net = slim.conv2d_transpose(net, 128, [3, 3], scope='conv_transpose3') # (batch_size, 16, 16, 128)
net = slim.batch_norm(net, scope='bn3')
net = slim.conv2d_transpose(net, 1, [3, 3], activation_fn=tf.nn.tanh, scope='conv_transpose4') # (batch_size, 32, 32, 1)
return net
def _extra_conv_arg_scope(weight_decay=0.00001, activation_fn=None, normalizer_fn=None):
with slim.arg_scope(
[slim.conv2d, slim.conv2d_transpose],
padding='SAME',
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,) as arg_sc:
with slim.arg_scope(
[slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn) as arg_sc:
return arg_sc
def upsample_layer(x, scope = None, scale = 2, mode = 'bilinear'):
if mode == 'deconv':
conv = slim.conv2d_transpose(x, 64, [4,4], stride = scale, activation_fn = lrelu, scope = scope)
conv = slim.conv2d(conv, 3,[3,3], activation_fn = None)
return conv
if mode == 'bilinear':
shape = x.get_shape().as_list()
h = shape[1]
w = shape[2]
conv = tf.image.resize_images(x, (scale*h, scale*w))
conv = slim.conv2d(conv, 3, [1,1], activation_fn = None)
return conv
def feature_extract_net(self, lr_image):
end_points = {}
with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
activation_fn = lrelu,
):
conv = slim.conv2d(lr_image, self.nfc, [3,3], scope = 'conv1')
for l in range(self.level):
for d in range(self.depth):
conv = slim.conv2d(conv, self.nfc, [3,3], scope = 'conv_%d_level_%d'%(l,d))
conv = slim.conv2d_transpose(conv, self.nfc, [4,4], stride = 2, scope = 'residual_level_%d'%(l))
conv = slim.conv2d(conv, 3, [3,3], activation_fn = None, scope = 'conv_level_%d'%(l))
end_points['residual_level_%d'%(l)] = conv
return end_points
def bottleneck_trans_same(inputs, depth, depth_bottleneck, stride, rate=1,
outputs_collections=None, scope=None):
"""Bottleneck residual unit variant with BN after convolutions.
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
its definition. Note that we use here the bottleneck variant which has an
extra bottleneck layer.
When putting together two consecutive ResNet blocks that use this unit, one
should use stride = 2 in the last unit of the first block.
Args:
inputs: A tensor of size [batch, height, width, channels].
depth: The depth of the ResNet unit output.
depth_bottleneck: The depth of the bottleneck layers.
stride: The ResNet unit's stride. Determines the amount of downsampling of
the units output compared to its input.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
Returns:
The ResNet unit's output.
"""
with tf.variable_scope(scope, 'bottleneck_trans', [inputs]) as sc:
shortcut = slim.conv2d_transpose(inputs, depth, 3, stride=stride,
activation_fn=None, scope='shortcut', padding='SAME')
residual = slim.conv2d_transpose(inputs, depth_bottleneck, [1, 1], stride=1,
scope='conv1_trans')
residual = slim.conv2d_transpose(residual, depth_bottleneck, 3, stride=stride, scope='conv2', padding='SAME')
residual = slim.conv2d_transpose(residual, depth, [1, 1], stride=1,
activation_fn=None, scope='conv3_trans')
output = tf.nn.relu(shortcut + residual)
return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope,
output)
def bottleneck_trans_valid(inputs, depth, depth_bottleneck, stride, rate=1,
outputs_collections=None, scope=None):
"""Bottleneck residual unit variant with BN after convolutions.
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
its definition. Note that we use here the bottleneck variant which has an
extra bottleneck layer.
When putting together two consecutive ResNet blocks that use this unit, one
should use stride = 2 in the last unit of the first block.
Args:
inputs: A tensor of size [batch, height, width, channels].
depth: The depth of the ResNet unit output.
depth_bottleneck: The depth of the bottleneck layers.
stride: The ResNet unit's stride. Determines the amount of downsampling of
the units output compared to its input.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
Returns:
The ResNet unit's output.
"""
with tf.variable_scope(scope, 'bottleneck_trans', [inputs]) as sc:
shortcut = slim.conv2d_transpose(inputs, depth, 3, stride=stride,
activation_fn=None, scope='shortcut', padding='VALID')
residual = slim.conv2d_transpose(inputs, depth_bottleneck, [1, 1], stride=1,
scope='conv1_trans')
residual = slim.conv2d_transpose(residual, depth_bottleneck, 3, stride=stride, scope='conv2', padding='VALID')
residual = slim.conv2d_transpose(residual, depth, [1, 1], stride=1,
activation_fn=None, scope='conv3_trans')
output = tf.nn.relu(shortcut + residual)
return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope,
output)
def bottleneck_trans_same(inputs, depth, depth_bottleneck, stride, rate=1,
outputs_collections=None, scope=None):
"""Bottleneck residual unit variant with BN after convolutions.
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
its definition. Note that we use here the bottleneck variant which has an
extra bottleneck layer.
When putting together two consecutive ResNet blocks that use this unit, one
should use stride = 2 in the last unit of the first block.
Args:
inputs: A tensor of size [batch, height, width, channels].
depth: The depth of the ResNet unit output.
depth_bottleneck: The depth of the bottleneck layers.
stride: The ResNet unit's stride. Determines the amount of downsampling of
the units output compared to its input.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
Returns:
The ResNet unit's output.
"""
with tf.variable_scope(scope, 'bottleneck_trans', [inputs]) as sc:
shortcut = slim.conv2d_transpose(inputs, depth, 3, stride=stride,
activation_fn=None, scope='shortcut', padding='SAME')
residual = slim.conv2d_transpose(inputs, depth_bottleneck, [1, 1], stride=1,
scope='conv1_trans')
residual = slim.conv2d_transpose(residual, depth_bottleneck, 3, stride=stride, scope='conv2', padding='SAME')
residual = slim.conv2d_transpose(residual, depth, [1, 1], stride=1,
activation_fn=None, scope='conv3_trans')
output = tf.nn.relu(shortcut + residual)
return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope,
output)
def bottleneck_trans_valid(inputs, depth, depth_bottleneck, stride, rate=1,
outputs_collections=None, scope=None):
"""Bottleneck residual unit variant with BN after convolutions.
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
its definition. Note that we use here the bottleneck variant which has an
extra bottleneck layer.
When putting together two consecutive ResNet blocks that use this unit, one
should use stride = 2 in the last unit of the first block.
Args:
inputs: A tensor of size [batch, height, width, channels].
depth: The depth of the ResNet unit output.
depth_bottleneck: The depth of the bottleneck layers.
stride: The ResNet unit's stride. Determines the amount of downsampling of
the units output compared to its input.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
Returns:
The ResNet unit's output.
"""
with tf.variable_scope(scope, 'bottleneck_trans', [inputs]) as sc:
shortcut = slim.conv2d_transpose(inputs, depth, 3, stride=stride,
activation_fn=None, scope='shortcut', padding='VALID')
residual = slim.conv2d_transpose(inputs, depth_bottleneck, [1, 1], stride=1,
scope='conv1_trans')
residual = slim.conv2d_transpose(residual, depth_bottleneck, 3, stride=stride, scope='conv2', padding='VALID')
residual = slim.conv2d_transpose(residual, depth, [1, 1], stride=1,
activation_fn=None, scope='conv3_trans')
output = tf.nn.relu(shortcut + residual)
return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope,
output)
def _deconv(self, x, num_out_layers, kernel_size, scale):
p_x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]])
conv = slim.conv2d_transpose(p_x, num_out_layers, kernel_size, scale, 'SAME')
return conv[:,3:-1,3:-1,:]
def deconv(self, x, num_out_layers, kernel_size, scale):
p_x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]])
conv = slim.conv2d_transpose(p_x, num_out_layers, kernel_size, scale, 'SAME')
return conv[:,3:-1,3:-1,:]
def generator(noise):
with slim.arg_scope([slim.conv2d_transpose],
weights_initializer=tf.truncated_normal_initializer(stddev=0.045),
biases_initializer=tf.constant_initializer(value=0),
activation_fn=None):
with slim.arg_scope([slim.batch_norm], is_training=train_phase, decay=0.9, epsilon=1e-5,
param_initializers={
"beta": tf.constant_initializer(value=0),
"gamma": tf.random_normal_initializer(mean=1, stddev=0.045)
}):
weight = tf.get_variable('Generator/W', [z_dim, 2 * IMAGE_SIZE * IMAGE_SIZE], initializer=tf.truncated_normal_initializer(stddev=0.045))
bias = tf.get_variable("Generator/b", [2 * IMAGE_SIZE * IMAGE_SIZE], initializer=tf.constant_initializer(0))
out_1 = tf.add(tf.matmul(noise, weight, name="Generator/out_1_matmul"), bias, name="Generator/out_1_add")
out_1 = tf.reshape(out_1, [-1, IMAGE_SIZE // 16 , IMAGE_SIZE // 16, 512], name="Generator/out_1_reshape")
out_1 = slim.batch_norm(inputs=out_1, activation_fn=tf.nn.relu, scope="Generator/bn_1")
out_2 = slim.conv2d_transpose(out_1, num_outputs=256, kernel_size=[5, 5], stride=2, padding="SAME", scope="Generator/deconv_2")
out_2 = slim.batch_norm(inputs=out_2, activation_fn=tf.nn.relu, scope="Generator/bn_2")
out_3 = slim.conv2d_transpose(out_2, num_outputs=128, kernel_size=[5, 5], stride=2, padding="SAME", scope="Generator/deconv_3")
out_3 = slim.batch_norm(inputs=out_3, activation_fn=tf.nn.relu, scope="Generator/bn_3")
out_4 = slim.conv2d_transpose(out_3, num_outputs=64, kernel_size=[5, 5], stride=2, padding="SAME", scope="Generator/deconv_4")
out_4 = slim.batch_norm(inputs=out_4, activation_fn=tf.nn.relu, scope="Generator/bn_4")
out_5 = slim.conv2d_transpose(out_4, num_outputs=3, kernel_size=[5, 5], stride=2, padding="SAME", scope="Generator/deconv_5")
out_5 = tf.nn.tanh(out_5, name="Generator/tanh_5")
return out_5
def deconv(self, x, num_out_layers, kernel_size, scale):
p_x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]])
conv = slim.conv2d_transpose(p_x, num_out_layers, kernel_size, scale, 'SAME')
return conv[:,3:-1,3:-1,:]
def equirectangular_net(self):
with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn = tf.nn.elu):
with tf.variable_scope("model", reuse = self.reuse_variables) as scope:
# Calculate pyramid for equirectangular top image.
self.top_pyramid = self.scale_pyramid(self.top, 4)
with tf.variable_scope("scaling"):
self.depth_scale = tf.constant(0.25, shape = [1])
self.disparity_scale = tf.get_variable("disparity_scale", shape = [1], trainable = False,
initializer = tf.constant_initializer(1.0 / np.pi))
if self.params.dropout:
resnet50 = lambda x: self.dropout_resnet50(x, scope)
elif self.params.noise:
resnet50 = lambda x: self.noisy_resnet50(x, scope)
else:
resnet50 = lambda x: self.resnet50(x, False)
if self.mode == 'train':
# Calculate pyramid for equirectangular bottom image.
self.bottom_pyramid = self.scale_pyramid(self.bottom, 4)
if self.params.test_crop:
crop_height = int(self.params.height / 8)
output1, output2, output3, output4 = resnet50(self.top[:, crop_height:-crop_height, :, :])
else:
output1, output2, output3, output4 = resnet50(self.top)
outputs = [output1, output2, output3, output4]
if self.params.test_crop:
outputs = [restore(output, self.params.height) for output in outputs]
if self.params.output_mode == "indirect":
self.outputs = [self.equirectangular_disparity_to_depth(output) for output in outputs]
elif self.params.output_mode == "direct":
self.outputs = outputs
elif self.params.output_mode == "attenuate":
self.outputs = [tf.concat(
[self.attenuate_equirectangular(tf.expand_dims(output[:, :, :, 0], 3), "top"), self.attenuate_equirectangular(tf.expand_dims(output[:, :, :, 1], 3), "bottom")],
3
) for output in outputs]
def _generator(self, z, is_training):
subnet = self.arch['generator']
n_layer = len(subnet['output'])
h, w, c = subnet['hwc']
with slim.arg_scope(
[slim.batch_norm],
scale=True,
updates_collections=None,
decay=0.9, epsilon=1e-5,
is_training=is_training,
scope='BN'):
x = slim.fully_connected(
z,
h * w * c,
normalizer_fn=slim.batch_norm,
activation_fn=tf.nn.relu)
x = tf.reshape(x, [-1, h, w, c])
with slim.arg_scope(
[slim.conv2d_transpose],
weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
normalizer_fn=slim.batch_norm,
activation_fn=tf.nn.relu):
for i in range(n_layer -1):
x = slim.conv2d_transpose(
x,
subnet['output'][i],
subnet['kernel'][i],
subnet['stride'][i])
# Don't apply BN for the last layer of G
x = slim.conv2d_transpose(
x,
subnet['output'][-1],
subnet['kernel'][-1],
subnet['stride'][-1],
normalizer_fn=None,
activation_fn=tf.nn.tanh)
return x
def deconv(self, x, num_out_layers, kernel_size, scale):
p_x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]])
conv = slim.conv2d_transpose(p_x, num_out_layers, kernel_size, scale, 'SAME')
return conv[:,3:-1,3:-1,:]
WhatWhereAutoencoder.py 文件源码
项目:Tensorflow_WhatWhereAutoencoder
作者: yselivonchyk
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def build_mnist_model(self, input, use_unpooling):
"""
Build autoencoder model for mnist dataset as described in the Stacked What-Where autoencoders paper
:param input: 4D tensor of source data of shae [batch_size, w, h, channels]
:param use_unpooling: indicate whether unpooling layer should be used instead of naive upsampling
:return: tuple of tensors:
train - train operation
encode - bottleneck tensor of the autoencoder network
decode - reconstruction of the input
"""
# Encoder. (16)5c-(32)3c-Xp
net = slim.conv2d(input, 16, [5, 5])
net = slim.conv2d(net, 32, [3, 3])
if use_unpooling:
encode, mask = max_pool_with_argmax(net, FLAGS.pool_size)
net = unpool(encode, mask, stride=FLAGS.pool_size)
else:
encode = slim.max_pool2d(net, kernel_size=[FLAGS.pool_size, FLAGS.pool_size], stride=FLAGS.pool_size)
net = upsample(encode, stride=FLAGS.pool_size)
# Decoder
net = slim.conv2d_transpose(net, 16, [3, 3])
net = slim.conv2d_transpose(net, 1, [5, 5])
decode = net
loss_l2 = tf.nn.l2_loss(slim.flatten(input) - slim.flatten(net))
# Optimizer
train = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate).minimize(loss_l2)
return train, encode, decode
def build_decoder(net, layer_config, i=None, reuse=False, masks=None):
i = i if i is not None else len(layer_config) - 1
cfg = layer_config[i]
name = cfg.dec_op_name if reuse else None
if len(layer_config) > i + 1:
if len(layer_config[i + 1].shape) != len(net.get_shape().as_list()):
net = tf.reshape(net, layer_config[i + 1].shape)
if i < 0 or layer_config[i].type == INPUT:
return net
if cfg.type == FC:
net = slim.fully_connected(net, int(np.prod(cfg.shape[1:])), scope=name,
activation_fn=cfg.activation, reuse=reuse)
elif cfg.type == CONV:
net = slim.conv2d_transpose(net, cfg.shape[-1], [cfg.kernel, cfg.kernel], stride=cfg.stride,
activation_fn=cfg.activation, padding=PADDING,
scope=name, reuse=reuse)
elif cfg.type == POOL_ARG:
if cfg.argmax is not None or masks is not None:
mask = cfg.argmax if cfg.argmax is not None else masks.pop()
net = nut.unpool(net, mask=mask, stride=cfg.kernel)
else:
net = nut.upsample(net, stride=cfg.kernel, mode='COPY')
elif cfg.type == POOL:
net = nut.upsample(net, cfg.kernel)
elif cfg.type == DO:
pass
elif cfg.type == LOSS:
cfg.arg2 = net
elif cfg.type == INPUT:
assert False
if not reuse:
cfg.dec_op_name = net.name.split('/')[0]
if not reuse:
ut.print_info('\rdecoder_%d \t%s' % (i, str(net)), color=CONFIG_COLOR)
cfg.dout = net
return build_decoder(net, layer_config, i - 1, reuse=reuse, masks=masks)
def deconv2d(input_, output_dim, ks=4, s=2, stddev=0.02, name="deconv2d"):
with tf.variable_scope(name):
return slim.conv2d_transpose(input_, output_dim, ks, s, padding='SAME', activation_fn=None,
weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
biases_initializer=None)
def discriminator(input_images, reuse=False):
with slim.arg_scope([slim.batch_norm],
is_training=train_phase, reuse=reuse, decay=0.9, epsilon=1e-5,
param_initializers={
"beta": tf.constant_initializer(value=0),
"gamma": tf.random_normal_initializer(mean=1, stddev=0.045)
}):
with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
weights_initializer=tf.truncated_normal_initializer(stddev=0.045),
biases_initializer=tf.constant_initializer(value=0),
activation_fn=None, reuse=reuse):
# Encoder
out_1 = slim.conv2d(inputs=input_images,
num_outputs=32,
kernel_size=[4, 4],
stride=2,
padding='SAME',
scope="Discriminator/conv_1")
bn_1 = slim.batch_norm(inputs=out_1, scope="Discriminator/bn_1")
out_1 = tf.maximum(0.2 * bn_1, bn_1, 'Discriminator/leaky_relu_1')
out_2 = slim.conv2d(inputs=out_1,
num_outputs=64,
kernel_size=[4, 4],
padding='SAME',
stride=2,
scope="Discriminator/conv_2")
bn_2 = slim.batch_norm(inputs=out_2, scope="Discriminator/bn_2")
out_2 = tf.maximum(0.2 * bn_2, bn_2, 'Discriminator/leaky_relu_2')
out_3 = slim.conv2d(inputs=out_2,
num_outputs=128,
kernel_size=[4, 4],
padding='SAME',
stride=2,
scope="Discriminator/conv_3")
bn_3 = slim.batch_norm(inputs=out_3, scope="Discriminator/bn_3")
out_3 = tf.maximum(0.2 * bn_3, bn_3, 'Discriminator/leaky_relu_3')
encode = tf.reshape(out_3, [-1, 2 * IMAGE_SIZE * IMAGE_SIZE], name="Discriminator/encode")
# Decoder
out_3 = tf.reshape(encode, [-1, IMAGE_SIZE // 8, IMAGE_SIZE // 8, 128], name="Discriminator/encode_reshape")
out_4 = slim.conv2d_transpose(inputs=out_3, num_outputs=64, kernel_size=[4, 4], stride=2,
padding='SAME', scope="Discriminator/deconv_4")
out_4 = slim.batch_norm(out_4, scope="Discriminator/bn_4")
out_4 = tf.maximum(0.2 * out_4, out_4, name="Discriminator/leaky_relu_4")
out_5 = slim.conv2d_transpose(inputs=out_4, num_outputs=32, kernel_size=[4, 4], stride=2,
padding='SAME', scope="Discriminator/deconv_5" )
out_5 = slim.batch_norm(out_5, scope="Discriminator/bn_5")
out_5 = tf.maximum(0.2 * out_5, out_5, name="Discriminator/leaky_relu_5")
out_6 = slim.conv2d_transpose(inputs=out_5, num_outputs=3, kernel_size=[4, 4], stride=2,
padding='SAME', scope="Discriminator/deconv_6")
# out_6 = slim.batch_norm(out_6, scope="Discriminator/bn_6")
decoded = tf.nn.tanh(out_6, name="Discriminator/tanh_6")
return encode, decoded
# mean squared errors
def _generator(self, z, y, is_training):
''' In this version, we only generate the target, so `y` is useless '''
subnet = self.arch['generator']
n_layer = len(subnet['output'])
h, w, c = subnet['hwc']
# y = tf.nn.embedding_lookup(self.y_emb, y)
x = self._merge([z, y], subnet['merge_dim'])
x = lrelu(x)
with slim.arg_scope(
[slim.batch_norm],
scale=True, scope='BN',
updates_collections=None,
# decay=0.9, epsilon=1e-5,
is_training=is_training):
x = slim.fully_connected(
x,
h * w * c,
normalizer_fn=slim.batch_norm,
activation_fn=lrelu)
x = tf.reshape(x, [-1, h, w, c])
with slim.arg_scope(
[slim.conv2d_transpose],
weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
normalizer_fn=slim.batch_norm,
activation_fn=lrelu):
for i in range(n_layer -1):
x = slim.conv2d_transpose(
x,
subnet['output'][i],
subnet['kernel'][i],
subnet['stride'][i]
# normalizer_fn=None
)
# Don't apply BN for the last layer of G
x = slim.conv2d_transpose(
x,
subnet['output'][-1],
subnet['kernel'][-1],
subnet['stride'][-1],
normalizer_fn=None,
activation_fn=None)
# pdb.set_trace()
logit = x
# x = tf.nn.tanh(logit)
# return x, logit
return tf.nn.sigmoid(logit), logit
def _generator(self, z, y, is_training):
''' In this version, we only generate the target, so `y` is useless '''
subnet = self.arch['generator']
n_layer = len(subnet['output'])
h, w, c = subnet['hwc']
# y = tf.nn.embedding_lookup(self.y_emb, y)
x = self._merge([z, y], subnet['merge_dim'])
x = lrelu(x)
with slim.arg_scope(
[slim.batch_norm],
scale=True, scope='BN',
updates_collections=None,
# decay=0.9, epsilon=1e-5,
is_training=is_training):
x = slim.fully_connected(
x,
h * w * c,
normalizer_fn=slim.batch_norm,
activation_fn=lrelu)
x = tf.reshape(x, [-1, h, w, c])
with slim.arg_scope(
[slim.conv2d_transpose],
weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
normalizer_fn=slim.batch_norm,
activation_fn=lrelu):
for i in range(n_layer -1):
x = slim.conv2d_transpose(
x,
subnet['output'][i],
subnet['kernel'][i],
subnet['stride'][i]
# normalizer_fn=None
)
# Don't apply BN for the last layer of G
x = slim.conv2d_transpose(
x,
subnet['output'][-1],
subnet['kernel'][-1],
subnet['stride'][-1],
normalizer_fn=None,
activation_fn=None)
# pdb.set_trace()
logit = x
# x = tf.nn.tanh(logit)
# return x, logit
return tf.nn.sigmoid(logit), logit