def mdclW(num_filters,num_channels,filter_size,winit,name,scales):
# Coefficient Initializer
sinit = lasagne.init.Constant(1.0/(1+len(scales)))
# Total filter size
size = filter_size + (filter_size-1)*(scales[-1]-1)
# Multiscale Dilated Filter
W = T.zeros((num_filters,num_channels,size,size))
# Undilated Base Filter
baseW = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,num_channels,filter_size,filter_size))),name=name+'.W')
for scale in enumerate(scales[::-1]): # enumerate backwards so that we place the main filter on top
W = T.set_subtensor(W[:,:,scales[-1]-scale:size-scales[-1]+scale:scale,scales[-1]-scale:size-scales[-1]+scale:scale],
baseW*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'.coeff_'+str(scale)).dimshuffle(0,'x','x','x'))
return W
# Subpixel Upsample Layer from (https://arxiv.org/abs/1609.05158)
# This layer uses a set of r^2 set_subtensor calls to reorganize the tensor in a subpixel-layer upscaling style
# as done in the ESPCN Magic ony paper for super-resolution.
# r is the upscale factor.
# c is the number of output channels.
python类init()的实例源码
def get_output_for(self, input, init=False, **kwargs):
if input.ndim > 2:
# if the input has more than two dimensions, flatten it into a
# batch of feature vectors.
input = input.flatten(2)
activation = T.tensordot(input, self.W, [[1], [0]])
abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
+ 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))
if init:
mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]
f = T.sum(T.exp(-abs_dif),axis=2)
if init:
mf = T.mean(f,axis=0)
f -= mf.dimshuffle('x',0)
self.init_updates.append((self.b, -mf))
else:
f += self.b.dimshuffle('x',0)
return T.concatenate([input, f], axis=1)
def l2normalize(layer, train_scale=True):
W_param = layer.W
s = W_param.get_value().shape
if len(s)==4:
axes_to_sum = (1,2,3)
dimshuffle_args = [0,'x','x','x']
k = s[0]
else:
axes_to_sum = 0
dimshuffle_args = ['x',0]
k = s[1]
layer.W_scale = layer.add_param(lasagne.init.Constant(1.),
(k,), name="W_scale", trainable=train_scale, regularizable=False)
layer.W = W_param * (layer.W_scale/T.sqrt(1e-6 + T.sum(T.square(W_param),axis=axes_to_sum))).dimshuffle(*dimshuffle_args)
return layer
# fully connected layer with weight normalization
def get_output_for(self, input, init=False, deterministic=False, **kwargs):
if input.ndim > 2:
# if the input has more than two dimensions, flatten it into a
# batch of feature vectors.
input = input.flatten(2)
activation = T.dot(input, self.W)
if init:
ma = T.mean(activation, axis=0)
activation -= ma.dimshuffle('x',0)
stdv = T.sqrt(T.mean(T.square(activation),axis=0))
activation /= stdv.dimshuffle('x',0)
self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
else:
activation += self.b.dimshuffle('x', 0)
return self.nonlinearity(activation)
# comes from Ishamel code base
def get_output_for(self, input, init=False, **kwargs):
if input.ndim > 2:
# if the input has more than two dimensions, flatten it into a
# batch of feature vectors.
input = input.flatten(2)
activation = T.tensordot(input, self.W, [[1], [0]])
abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
+ 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))
if init:
mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]
f = T.sum(T.exp(-abs_dif),axis=2)
if init:
mf = T.mean(f,axis=0)
f -= mf.dimshuffle('x',0)
self.init_updates.append((self.b, -mf))
else:
f += self.b.dimshuffle('x',0)
return T.concatenate([input, f], axis=1)
def l2normalize(layer, train_scale=True):
W_param = layer.W
s = W_param.get_value().shape
if len(s)==4:
axes_to_sum = (1,2,3)
dimshuffle_args = [0,'x','x','x']
k = s[0]
else:
axes_to_sum = 0
dimshuffle_args = ['x',0]
k = s[1]
layer.W_scale = layer.add_param(lasagne.init.Constant(1.),
(k,), name="W_scale", trainable=train_scale, regularizable=False)
layer.W = W_param * (layer.W_scale/T.sqrt(1e-6 + T.sum(T.square(W_param),axis=axes_to_sum))).dimshuffle(*dimshuffle_args)
return layer
# fully connected layer with weight normalization
def get_output_for(self, input, init=False, deterministic=False, **kwargs):
if input.ndim > 2:
# if the input has more than two dimensions, flatten it into a
# batch of feature vectors.
input = input.flatten(2)
activation = T.dot(input, self.W)
if init:
ma = T.mean(activation, axis=0)
activation -= ma.dimshuffle('x',0)
stdv = T.sqrt(T.mean(T.square(activation),axis=0))
activation /= stdv.dimshuffle('x',0)
self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
else:
activation += self.b.dimshuffle('x', 0)
return self.nonlinearity(activation)
# comes from Ishamel code base
def __init__(self, incoming, RMAX,DMAX,axes='auto', epsilon=1e-4, alpha=0.1,
beta=lasagne.init.Constant(0), gamma=lasagne.init.Constant(1),
mean=lasagne.init.Constant(0), inv_std=lasagne.init.Constant(1), **kwargs):
super(BatchReNormDNNLayer, self).__init__(
incoming, axes, epsilon, alpha, beta, gamma, mean, inv_std,
**kwargs)
all_but_second_axis = (0,) + tuple(range(2, len(self.input_shape)))
self.RMAX,self.DMAX = RMAX,DMAX
if self.axes not in ((0,), all_but_second_axis):
raise ValueError("BatchNormDNNLayer only supports normalization "
"across the first axis, or across all but the "
"second axis, got axes=%r" % (axes,))
def __init__(self, incoming, num_filters, filter_size, stride=(1, 1),
crop=0, untie_biases=False,
W=initmethod(), b=lasagne.init.Constant(0.),
nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False,
**kwargs):
super(DeconvLayer, self).__init__(
incoming, num_filters, filter_size, stride, crop, untie_biases,
W, b, nonlinearity, flip_filters, n=2, **kwargs)
# rename self.crop to self.pad
self.crop = self.pad
del self.pad
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
super(MinibatchLayer, self).__init__(incoming, **kwargs)
self.num_kernels = num_kernels
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
self.b = self.add_param(b, (num_kernels,), name="b")
def __init__(self, incoming, num_units, mask_generator,layerIdx,W=lasagne.init.GlorotUniform(),
b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs):
super(MaskedLayer, self).__init__(incoming, num_units, W,b, nonlinearity,**kwargs)
self.mask_generator = mask_generator
num_inputs = int(np.prod(self.input_shape[1:]))
self.weights_mask = self.add_param(spec = np.ones((num_inputs, num_units),dtype=np.float32),
shape = (num_inputs, num_units),
name='weights_mask',
trainable=False,
regularizable=False)
self.layerIdx = layerIdx
self.shuffle_update = [(self.weights_mask, mask_generator.get_mask_layer_UPDATE(self.layerIdx))]
def __init__(self, incoming, num_units, mask_generator,layerIdx,W=lasagne.init.GlorotUniform(),
b=lasagne.init.Constant(0.), nonlinearity=None,**kwargs):
super(DIML, self).__init__(incoming, num_units, W,b, nonlinearity,**kwargs)
self.mask_generator = mask_generator
self.layerIdx = layerIdx
num_inputs = int(np.prod(self.input_shape[1:]))
self.weights_mask = self.add_param(spec = np.ones((num_inputs, num_units),dtype=np.float32),
shape = (num_inputs, num_units),
name='weights_mask',
trainable=False,
regularizable=False)
self.shuffle_update = [(self.weights_mask, self.mask_generator.get_direct_input_mask_layer_UPDATE(self.layerIdx + 1))]
def get_output_for(self,input, **kwargs):
if input.ndim > 2:
input = input.flatten(2)
activation = T.dot(input, self.W*self.weights_mask)
if self.b is not None:
activation = activation + self.b.dimshuffle('x', 0)
return self.nonlinearity(activation)
# Conditioning Masked Layer
# Currently not used.
# class CML(MaskedLayer):
# def __init__(self, incoming, num_units, mask_generator,use_cond_mask=False,U=lasagne.init.GlorotUniform(),W=lasagne.init.GlorotUniform(),
# b=init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs):
# super(CML, self).__init__(incoming, num_units, mask_generator,W,
# b, nonlinearity,**kwargs)
# self.use_cond_mask=use_cond_mask
# if use_cond_mask:
# self.U = self.add_param(spec = U,
# shape = (num_inputs, num_units),
# name='U',
# trainable=True,
# regularizable=False)theano.shared(value=self.weights_initialization((self.n_in, self.n_out)), name=self.name+'U', borrow=True)
# self.add_param(self.U,name =
# def get_output_for(self,input,**kwargs):
# lin = self.lin_output = T.dot(input, self.W * self.weights_mask) + self.b
# if self.use_cond_mask:
# lin = lin+T.dot(T.ones_like(input), self.U * self.weights_mask)
# return lin if self._activation is None else self._activation(lin)
# Made layer, adopted from M.Germain
def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.),
W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs):
super(WeightNormLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = nonlinearity
self.init_stdv = init_stdv
k = self.input_shape[1]
if b is not None:
self.b = self.add_param(b, (k,), name="b", regularizable=False)
if g is not None:
self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g)
if len(self.input_shape)==4:
self.axes_to_sum = (0,2,3)
self.dimshuffle_args = ['x',0,'x','x']
else:
self.axes_to_sum = 0
self.dimshuffle_args = ['x',0]
# scale weights in layer below
incoming.W_param = incoming.W
#incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape))
if incoming.W_param.ndim==4:
if isinstance(incoming, Deconv2DLayer):
W_axes_to_sum = (0,2,3)
W_dimshuffle_args = ['x',0,'x','x']
else:
W_axes_to_sum = (1,2,3)
W_dimshuffle_args = [0,'x','x','x']
else:
W_axes_to_sum = 0
W_dimshuffle_args = ['x',0]
if g is not None:
incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args)
else:
incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))
def get_output_for(self, input, init=False, **kwargs):
if init:
m = T.mean(input, self.axes_to_sum)
input -= m.dimshuffle(*self.dimshuffle_args)
inv_stdv = self.init_stdv/T.sqrt(T.mean(T.square(input), self.axes_to_sum))
input *= inv_stdv.dimshuffle(*self.dimshuffle_args)
self.init_updates = [(self.b, -m*inv_stdv), (self.g, self.g*inv_stdv)]
elif hasattr(self,'b'):
input += self.b.dimshuffle(*self.dimshuffle_args)
return self.nonlinearity(input)
def __init__(self, incoming, target_shape, filter_size, stride=(2, 2), pad='half',
W=lasagne.init.Normal(0.05), b=lasagne.init.Constant(0.), nonlinearity=relu, **kwargs):
super(Deconv2DLayer, self).__init__(incoming, **kwargs)
self.target_shape = target_shape
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.filter_size = lasagne.layers.dnn.as_tuple(filter_size, 2)
self.stride = lasagne.layers.dnn.as_tuple(stride, 2)
self.pad = pad
self.W_shape = (incoming.output_shape[1], target_shape[1], filter_size[0], filter_size[1])
self.W = self.add_param(W, self.W_shape, name="W")
if b is not None:
self.b = self.add_param(b, (target_shape[1],), name="b")
else:
self.b = None
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
super(MinibatchLayer, self).__init__(incoming, **kwargs)
self.num_kernels = num_kernels
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
self.b = self.add_param(b, (num_kernels,), name="b")
def batch_norm(layer, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.), **kwargs):
"""
adapted from https://gist.github.com/f0k/f1a6bd3c8585c400c190
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = lasagne.nonlinearities.identity
else:
nonlinearity = lasagne.nonlinearities.identity
if hasattr(layer, 'b'):
del layer.params[layer.b]
layer.b = None
return BatchNormLayer(layer, b, g, nonlinearity=nonlinearity, **kwargs)
def __init__(self, incoming, num_units, theta=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.),
weight_scale=lasagne.init.Constant(1.), train_scale=False, nonlinearity=relu, **kwargs):
super(DenseLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.num_units = num_units
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_units), name="theta")
self.weight_scale = self.add_param(weight_scale, (num_units,), name="weight_scale", trainable=train_scale)
self.W = self.theta * (self.weight_scale/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0)
self.b = self.add_param(b, (num_units,), name="b")
def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.),
W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs):
super(WeightNormLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = nonlinearity
self.init_stdv = init_stdv
k = self.input_shape[1]
if b is not None:
self.b = self.add_param(b, (k,), name="b", regularizable=False)
if g is not None:
self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g)
if len(self.input_shape)==4:
self.axes_to_sum = (0,2,3)
self.dimshuffle_args = ['x',0,'x','x']
else:
self.axes_to_sum = 0
self.dimshuffle_args = ['x',0]
# scale weights in layer below
incoming.W_param = incoming.W
#incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape))
if incoming.W_param.ndim==4:
if isinstance(incoming, Deconv2DLayer):
W_axes_to_sum = (0,2,3)
W_dimshuffle_args = ['x',0,'x','x']
else:
W_axes_to_sum = (1,2,3)
W_dimshuffle_args = [0,'x','x','x']
else:
W_axes_to_sum = 0
W_dimshuffle_args = ['x',0]
if g is not None:
incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args)
else:
incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))
def get_output_for(self, input, init=False, **kwargs):
if init:
m = T.mean(input, self.axes_to_sum)
input -= m.dimshuffle(*self.dimshuffle_args)
inv_stdv = self.init_stdv/T.sqrt(T.mean(T.square(input), self.axes_to_sum))
input *= inv_stdv.dimshuffle(*self.dimshuffle_args)
self.init_updates = [(self.b, -m*inv_stdv), (self.g, self.g*inv_stdv)]
elif hasattr(self,'b'):
input += self.b.dimshuffle(*self.dimshuffle_args)
return self.nonlinearity(input)
def __init__(self, incoming, target_shape, filter_size, stride=(2, 2), pad='half',
W=lasagne.init.Normal(0.05), b=lasagne.init.Constant(0.), nonlinearity=relu, **kwargs):
super(Deconv2DLayer, self).__init__(incoming, **kwargs)
self.target_shape = target_shape
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.filter_size = lasagne.layers.dnn.as_tuple(filter_size, 2)
self.stride = lasagne.layers.dnn.as_tuple(stride, 2)
self.pad = pad
self.W_shape = (incoming.output_shape[1], target_shape[1], filter_size[0], filter_size[1])
self.W = self.add_param(W, self.W_shape, name="W")
if b is not None:
self.b = self.add_param(b, (target_shape[1],), name="b")
else:
self.b = None
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
super(MinibatchLayer, self).__init__(incoming, **kwargs)
self.num_kernels = num_kernels
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
self.b = self.add_param(b, (num_kernels,), name="b")
def batch_norm(layer, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.), **kwargs):
"""
adapted from https://gist.github.com/f0k/f1a6bd3c8585c400c190
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = lasagne.nonlinearities.identity
else:
nonlinearity = lasagne.nonlinearities.identity
if hasattr(layer, 'b'):
del layer.params[layer.b]
layer.b = None
return BatchNormLayer(layer, b, g, nonlinearity=nonlinearity, **kwargs)
def __init__(self, incoming, num_units, theta=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.),
weight_scale=lasagne.init.Constant(1.), train_scale=False, nonlinearity=relu, **kwargs):
super(DenseLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.num_units = num_units
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_units), name="theta")
self.weight_scale = self.add_param(weight_scale, (num_units,), name="weight_scale", trainable=train_scale)
self.W = self.theta * (self.weight_scale/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0)
self.b = self.add_param(b, (num_units,), name="b")
def MDCL(incoming,num_filters,scales,name,dnn=True):
if dnn:
from lasagne.layers.dnn import Conv2DDNNLayer as C2D
# W initialization method--this should also work as Orthogonal('relu'), but I have yet to validate that as thoroughly.
winit = initmethod(0.02)
# Initialization method for the coefficients
sinit = lasagne.init.Constant(1.0/(1+len(scales)))
# Number of incoming channels
ni =lasagne.layers.get_output_shape(incoming)[1]
# Weight parameter--the primary parameter for this block
W = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,lasagne.layers.get_output_shape(incoming)[1],3,3))),name=name+'W')
# Primary Convolution Layer--No Dilation
n = C2D(incoming = incoming,
num_filters = num_filters,
filter_size = [3,3],
stride = [1,1],
pad = (1,1),
W = W*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_base').dimshuffle(0,'x','x','x'), # Note the broadcasting dimshuffle for the num_filter scalars.
b = None,
nonlinearity = None,
name = name+'base'
)
# List of remaining layers. This should probably just all be concatenated into a single list rather than being a separate deal.
nd = []
for i,scale in enumerate(scales):
# I don't think 0 dilation is technically defined (or if it is it's just the regular filter) but I use it here as a convenient keyword to grab the 1x1 mean conv.
if scale==0:
nd.append(C2D(incoming = incoming,
num_filters = num_filters,
filter_size = [1,1],
stride = [1,1],
pad = (0,0),
W = T.mean(W,axis=[2,3]).dimshuffle(0,1,'x','x')*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_1x1').dimshuffle(0,'x','x','x'),
b = None,
nonlinearity = None,
name = name+str(scale)))
# Note the dimshuffles in this layer--these are critical as the current DilatedConv2D implementation uses a backward pass.
else:
nd.append(lasagne.layers.DilatedConv2DLayer(incoming = lasagne.layers.PadLayer(incoming = incoming, width=(scale,scale)),
num_filters = num_filters,
filter_size = [3,3],
dilation=(scale,scale),
W = W.dimshuffle(1,0,2,3)*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_'+str(scale)).dimshuffle('x',0,'x','x'),
b = None,
nonlinearity = None,
name = name+str(scale)))
return ESL(nd+[n])
# MDC-based Upsample Layer.
# This is a prototype I don't make use of extensively. It's operational but it doesn't seem to improve results yet.
def _sample_trained_minibatch_gan(params_file, n, batch_size, rs):
import lasagne
from lasagne.init import Normal
import lasagne.layers as ll
import theano as th
from theano.sandbox.rng_mrg import MRG_RandomStreams
import theano.tensor as T
import nn
theano_rng = MRG_RandomStreams(rs.randint(2 ** 15))
lasagne.random.set_rng(np.random.RandomState(rs.randint(2 ** 15)))
noise_dim = (batch_size, 100)
noise = theano_rng.uniform(size=noise_dim)
ls = [ll.InputLayer(shape=noise_dim, input_var=noise)]
ls.append(nn.batch_norm(
ll.DenseLayer(ls[-1], num_units=4*4*512, W=Normal(0.05),
nonlinearity=nn.relu),
g=None))
ls.append(ll.ReshapeLayer(ls[-1], (batch_size,512,4,4)))
ls.append(nn.batch_norm(
nn.Deconv2DLayer(ls[-1], (batch_size,256,8,8), (5,5), W=Normal(0.05),
nonlinearity=nn.relu),
g=None)) # 4 -> 8
ls.append(nn.batch_norm(
nn.Deconv2DLayer(ls[-1], (batch_size,128,16,16), (5,5), W=Normal(0.05),
nonlinearity=nn.relu),
g=None)) # 8 -> 16
ls.append(nn.weight_norm(
nn.Deconv2DLayer(ls[-1], (batch_size,3,32,32), (5,5), W=Normal(0.05),
nonlinearity=T.tanh),
train_g=True, init_stdv=0.1)) # 16 -> 32
gen_dat = ll.get_output(ls[-1])
with np.load(params_file) as d:
params = [d['arr_{}'.format(i)] for i in range(9)]
ll.set_all_param_values(ls[-1], params, trainable=True)
sample_batch = th.function(inputs=[], outputs=gen_dat)
samps = []
while len(samps) < n:
samps.extend(sample_batch())
samps = np.array(samps[:n])
return samps
def get_discriminator(self):
''' specify discriminator D0 '''
"""
disc0_layers = [LL.InputLayer(shape=(self.args.batch_size, 3, 32, 32))]
disc0_layers.append(LL.GaussianNoiseLayer(disc0_layers[-1], sigma=0.05))
disc0_layers.append(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu))
disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 16x16
disc0_layers.append(LL.DropoutLayer(disc0_layers[-1], p=0.1))
disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu)))
disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 8x8
disc0_layers.append(LL.DropoutLayer(disc0_layers[-1], p=0.1))
disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=0, W=Normal(0.02), nonlinearity=nn.lrelu))) # 6x6
disc0_layer_shared = LL.NINLayer(disc0_layers[-1], num_units=192, W=Normal(0.02), nonlinearity=nn.lrelu) # 6x6
disc0_layers.append(disc0_layer_shared)
disc0_layer_z_recon = LL.DenseLayer(disc0_layer_shared, num_units=50, W=Normal(0.02), nonlinearity=None)
disc0_layers.append(disc0_layer_z_recon) # also need to recover z from x
disc0_layers.append(LL.GlobalPoolLayer(disc0_layer_shared))
disc0_layer_adv = LL.DenseLayer(disc0_layers[-1], num_units=10, W=Normal(0.02), nonlinearity=None)
disc0_layers.append(disc0_layer_adv)
return disc0_layers, disc0_layer_adv, disc0_layer_z_recon
"""
disc_x_layers = [LL.InputLayer(shape=(None, 3, 32, 32))]
disc_x_layers.append(LL.GaussianNoiseLayer(disc_x_layers[-1], sigma=0.2))
disc_x_layers.append(dnn.Conv2DDNNLayer(disc_x_layers[-1], 96, (3,3), pad=1, W=Normal(0.01), nonlinearity=nn.lrelu))
disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.01), nonlinearity=nn.lrelu)))
disc_x_layers.append(LL.DropoutLayer(disc_x_layers[-1], p=0.5))
disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=1, W=Normal(0.01), nonlinearity=nn.lrelu)))
disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.01), nonlinearity=nn.lrelu)))
disc_x_layers.append(LL.DropoutLayer(disc_x_layers[-1], p=0.5))
disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=0, W=Normal(0.01), nonlinearity=nn.lrelu)))
disc_x_layers_shared = LL.NINLayer(disc_x_layers[-1], num_units=192, W=Normal(0.01), nonlinearity=nn.lrelu)
disc_x_layers.append(disc_x_layers_shared)
disc_x_layer_z_recon = LL.DenseLayer(disc_x_layers_shared, num_units=self.args.z0dim, nonlinearity=None)
disc_x_layers.append(disc_x_layer_z_recon) # also need to recover z from x
# disc_x_layers.append(nn.MinibatchLayer(disc_x_layers_shared, num_kernels=100))
disc_x_layers.append(LL.GlobalPoolLayer(disc_x_layers_shared))
disc_x_layer_adv = LL.DenseLayer(disc_x_layers[-1], num_units=10, W=Normal(0.01), nonlinearity=None)
disc_x_layers.append(disc_x_layer_adv)
#output_before_softmax_x = LL.get_output(disc_x_layer_adv, x, deterministic=False)
#output_before_softmax_gen = LL.get_output(disc_x_layer_adv, gen_x, deterministic=False)
# temp = LL.get_output(gen_x_layers[-1], deterministic=False, init=True)
# temp = LL.get_output(disc_x_layers[-1], x, deterministic=False, init=True)
# init_updates = [u for l in LL.get_all_layers(gen_x_layers)+LL.get_all_layers(disc_x_layers) for u in getattr(l,'init_updates',[])]
return disc_x_layers, disc_x_layer_adv, disc_x_layer_z_recon
def build_network():
conv_defs = {
'W': lasagne.init.HeNormal('relu'),
'b': lasagne.init.Constant(0.0),
'filter_size': (3, 3),
'stride': (1, 1),
'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1)
}
nin_defs = {
'W': lasagne.init.HeNormal('relu'),
'b': lasagne.init.Constant(0.0),
'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1)
}
dense_defs = {
'W': lasagne.init.HeNormal(1.0),
'b': lasagne.init.Constant(0.0),
'nonlinearity': lasagne.nonlinearities.softmax
}
wn_defs = {
'momentum': .999
}
net = InputLayer ( name='input', shape=(None, 3, 32, 32))
net = GaussianNoiseLayer(net, name='noise', sigma=.15)
net = WN(Conv2DLayer (net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs)
net = WN(Conv2DLayer (net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs)
net = WN(Conv2DLayer (net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs)
net = MaxPool2DLayer (net, name='pool1', pool_size=(2, 2))
net = DropoutLayer (net, name='drop1', p=.5)
net = WN(Conv2DLayer (net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs)
net = WN(Conv2DLayer (net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs)
net = WN(Conv2DLayer (net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs)
net = MaxPool2DLayer (net, name='pool2', pool_size=(2, 2))
net = DropoutLayer (net, name='drop2', p=.5)
net = WN(Conv2DLayer (net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs)
net = WN(NINLayer (net, name='conv3b', num_units=256, **nin_defs), **wn_defs)
net = WN(NINLayer (net, name='conv3c', num_units=128, **nin_defs), **wn_defs)
net = GlobalPoolLayer (net, name='pool3')
net = WN(DenseLayer (net, name='dense', num_units=10, **dense_defs), **wn_defs)
return net