def ali_deconv(self, opts, noise, is_training, reuse, keep_prob):
output_shape = self._data.data_shape
batch_size = tf.shape(noise)[0]
noise_size = int(noise.get_shape()[1])
data_height = output_shape[0]
data_width = output_shape[1]
data_channels = output_shape[2]
noise = tf.reshape(noise, [-1, 1, 1, noise_size])
num_units = opts['g_num_filters']
layer_params = []
layer_params.append([4, 1, num_units])
layer_params.append([4, 2, num_units / 2])
layer_params.append([4, 1, num_units / 4])
layer_params.append([4, 2, num_units / 8])
layer_params.append([5, 1, num_units / 8])
# For convolution: (n - k) / stride + 1 = s
# For transposed: (s - 1) * stride + k = n
layer_x = noise
height = 1
width = 1
for i, (kernel, stride, channels) in enumerate(layer_params):
height = (height - 1) * stride + kernel
width = height
layer_x = ops.deconv2d(
opts, layer_x, [batch_size, height, width, channels], d_h=stride, d_w=stride,
scope='h%d_deconv' % i, conv_filters_dim=kernel, padding='VALID')
if opts['batch_norm']:
layer_x = ops.batch_norm(opts, layer_x, is_training, reuse, scope='bn%d' % i)
layer_x = ops.lrelu(layer_x, 0.1)
assert height == data_height
assert width == data_width
# Then two 1x1 convolutions.
layer_x = ops.conv2d(opts, layer_x, num_units / 8, d_h=1, d_w=1, scope='conv2d_1x1', conv_filters_dim=1)
if opts['batch_norm']:
layer_x = ops.batch_norm(opts, layer_x, is_training, reuse, scope='bnlast')
layer_x = ops.lrelu(layer_x, 0.1)
layer_x = ops.conv2d(opts, layer_x, data_channels, d_h=1, d_w=1, scope='conv2d_1x1_2', conv_filters_dim=1)
if opts['input_normalize_sym']:
return tf.nn.tanh(layer_x)
else:
return tf.nn.sigmoid(layer_x)
python类conv2d()的实例源码
def ali_encoder(self, opts, input_, is_training=False, reuse=False, keep_prob=1.):
num_units = opts['e_num_filters']
layer_params = []
layer_params.append([5, 1, num_units / 8])
layer_params.append([4, 2, num_units / 4])
layer_params.append([4, 1, num_units / 2])
layer_params.append([4, 2, num_units])
layer_params.append([4, 1, num_units * 2])
# For convolution: (n - k) / stride + 1 = s
# For transposed: (s - 1) * stride + k = n
layer_x = input_
height = int(layer_x.get_shape()[1])
width = int(layer_x.get_shape()[2])
assert height == width
for i, (kernel, stride, channels) in enumerate(layer_params):
height = (height - kernel) / stride + 1
width = height
# print((height, width))
layer_x = ops.conv2d(
opts, layer_x, channels, d_h=stride, d_w=stride,
scope='h%d_conv' % i, conv_filters_dim=kernel, padding='VALID')
if opts['batch_norm']:
layer_x = ops.batch_norm(opts, layer_x, is_training, reuse, scope='bn%d' % i)
layer_x = ops.lrelu(layer_x, 0.1)
assert height == 1
assert width == 1
# Then two 1x1 convolutions.
layer_x = ops.conv2d(opts, layer_x, num_units * 2, d_h=1, d_w=1, scope='conv2d_1x1', conv_filters_dim=1)
if opts['batch_norm']:
layer_x = ops.batch_norm(opts, layer_x, is_training, reuse, scope='bnlast')
layer_x = ops.lrelu(layer_x, 0.1)
layer_x = ops.conv2d(opts, layer_x, num_units / 2, d_h=1, d_w=1, scope='conv2d_1x1_2', conv_filters_dim=1)
if opts['e_is_random']:
latent_mean = ops.linear(
opts, layer_x, opts['latent_space_dim'], scope='hlast_lin')
log_latent_sigmas = ops.linear(
opts, layer_x, opts['latent_space_dim'], scope='hlast_lin_sigma')
return latent_mean, log_latent_sigmas
else:
return ops.linear(opts, layer_x, opts['latent_space_dim'], scope='hlast_lin')
def _recon_loss_using_disc_conv_eb(self, opts, reconstructed_training, real_points, is_training, keep_prob):
"""Build an additional loss using a discriminator in X space, using Energy Based approach."""
def copy3D(height, width, channels):
m = np.zeros([height, width, channels, height, width, channels])
for i in xrange(height):
for j in xrange(width):
for c in xrange(channels):
m[i, j, c, i, j, c] = 1.0
return tf.constant(np.reshape(m, [height, width, channels, -1]), dtype=tf.float32)
def _architecture(inputs, reuse=None):
dim = opts['adv_c_patches_size']
height = int(inputs.get_shape()[1])
width = int(inputs.get_shape()[2])
channels = int(inputs.get_shape()[3])
with tf.variable_scope('DISC_X_LOSS', reuse=reuse):
num_units = opts['adv_c_num_units']
num_layers = 1
layer_x = inputs
for i in xrange(num_layers):
# scale = 2**(num_layers-i-1)
layer_x = ops.conv2d(opts, layer_x, num_units, d_h=1, d_w=1, scope='h%d_conv' % i,
conv_filters_dim=dim, padding='SAME')
# if opts['batch_norm']:
# layer_x = ops.batch_norm(opts, layer_x, is_training, reuse, scope='bn%d' % i)
layer_x = ops.lrelu(layer_x, 0.1) #tf.nn.relu(layer_x)
copy_w = copy3D(dim, dim, channels)
duplicated = tf.nn.conv2d(inputs, copy_w, strides=[1, 1, 1, 1], padding='SAME')
decoded = ops.conv2d(
opts, layer_x, channels * dim * dim, d_h=1, d_w=1, scope="decoder",
conv_filters_dim=1, padding='SAME')
reconstruction = tf.reduce_mean(tf.square(tf.stop_gradient(duplicated) - decoded), [1, 2, 3])
assert len(reconstruction.get_shape()) == 1
return flatten(layer_x), reconstruction
reconstructed_embed_sg, adv_fake_layer = _architecture(tf.stop_gradient(reconstructed_training), reuse=None)
reconstructed_embed, _ = _architecture(reconstructed_training, reuse=True)
# Below line enforces the forward to be reconstructed_embed and backwards to NOT change the discriminator....
crazy_hack = reconstructed_embed-reconstructed_embed_sg+tf.stop_gradient(reconstructed_embed_sg)
real_p_embed_sg, adv_true_layer = _architecture(tf.stop_gradient(real_points), reuse=True)
real_p_embed, _ = _architecture(real_points, reuse=True)
adv_fake = tf.reduce_mean(adv_fake_layer)
adv_true = tf.reduce_mean(adv_true_layer)
adv_c_loss = tf.log(adv_true) - tf.log(adv_fake)
emb_c = tf.reduce_sum(tf.square(crazy_hack - tf.stop_gradient(real_p_embed)), 1)
emb_c_loss = tf.reduce_mean(emb_c)
return adv_c_loss, emb_c_loss
def build(self, is_train=True):
n = self.a_dim
conv_info = self.conv_info
# build loss and accuracy {{{
def build_loss(logits, labels):
# Cross-entropy loss
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
# Classification accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return tf.reduce_mean(loss), accuracy
# }}}
# Classifier: takes images as input and outputs class label [B, m]
def C(img, q, scope='Classifier'):
with tf.variable_scope(scope) as scope:
log.warn(scope.name)
conv_1 = conv2d(img, conv_info[0], is_train, s_h=3, s_w=3, name='conv_1')
conv_2 = conv2d(conv_1, conv_info[1], is_train, s_h=3, s_w=3, name='conv_2')
conv_3 = conv2d(conv_2, conv_info[2], is_train, name='conv_3')
conv_4 = conv2d(conv_3, conv_info[3], is_train, name='conv_4')
conv_q = tf.concat([tf.reshape(conv_4, [self.batch_size, -1]), q], axis=1)
fc_1 = fc(conv_q, 256, name='fc_1')
fc_2 = fc(fc_1, 256, name='fc_2')
fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=is_train, scope='fc_3/')
fc_3 = fc(fc_2, n, activation_fn=None, name='fc_3')
return fc_3
logits = C(self.img, self.q, scope='Classifier')
self.all_preds = tf.nn.softmax(logits)
self.loss, self.accuracy = build_loss(logits, self.a)
# Add summaries
def draw_iqa(img, q, target_a, pred_a):
fig, ax = tfplot.subplots(figsize=(6, 6))
ax.imshow(img)
ax.set_title(question2str(q))
ax.set_xlabel(answer2str(target_a)+answer2str(pred_a, 'Predicted'))
return fig
try:
tfplot.summary.plot_many('IQA/',
draw_iqa, [self.img, self.q, self.a, self.all_preds],
max_outputs=3,
collections=["plot_summaries"])
except:
pass
tf.summary.scalar("loss/accuracy", self.accuracy)
tf.summary.scalar("loss/cross_entropy", self.loss)
log.warn('Successfully loaded the model.')