def network_generator(self, input_var, network_weights=None):
# Input layer
layers = []
n_blocks = int(np.log2(self.input_size / 8)) + 1 # end up with 8x8 output
layers.append(InputLayer(shape=(None, self.hidden_size), input_var=input_var, name='generator/input'))
# Dense layer up (from h to n*8*8)
layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters), name='generator/dense%d' % len(layers), network_weights=network_weights))
layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters, 8, 8), name='generator/reshape%d' % len(layers)))
# Convolutional blocks (decoder)
for i_block in range(1, n_blocks+1):
layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights))
layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights))
if i_block != n_blocks:
layers.append(Upscale2DLayer(layers[-1], scale_factor=2, name='generator/upsample%d' % len(layers)))
# Final layer (make sure input images are in the range [-1, 1] if tanh used)
layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='generator/output', network_weights=network_weights, nonlinearity=sigmoid))
# Network in dictionary form
network = {layer.name: layer for layer in layers}
return network
# def network_generator_alt(self, input_var, network_weights=None):
#
# # Input layer
# layers = []
# n_blocks = int(np.log2(self.input_size / 8)) + 1 # end up with 8x8 output
# layers.append(InputLayer(shape=(None, self.hidden_size), input_var=input_var, name='generator/input'))
#
# # Dense layer up (from h to n*8*8)
# layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters*n_blocks), name='generator/dense%d' % len(layers), network_weights=network_weights, nonlinearity=elu, bn=True))
# layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters*n_blocks, 8, 8), name='generator/reshape%d' % len(layers)))
#
# # Convolutional blocks (decoder)
# for i_block in range(1, n_blocks+1)[::-1]:
# # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights, bn=True))
# # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights, bn=True))
# if i_block != 1:
# layers.append(transposed_conv_layer(layers[-1], n_filters=self.n_filters*(i_block-1), stride=2, name='generator/upsample%d' % len(layers),
# output_size=8*2**(n_blocks-i_block+1), network_weights=network_weights, nonlinearity=elu, bn=True))
#
# # Final layer (make sure input images are in the range [-1, 1]
# layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='generator/output', network_weights=network_weights, nonlinearity=tanh, bn=False))
#
# # Network in dictionary form
# network = {layer.name: layer for layer in layers}
#
# return network
python类ReshapeLayer()的实例源码
def __init__(self, input_shape, output_dim, hidden_dim, hidden_nonlinearity=LN.rectify,
output_nonlinearity=None, name=None, input_var=None, input_layer=None):
if input_layer is None:
l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
else:
l_in = input_layer
l_step_input = L.InputLayer(shape=(None,) + input_shape)
l_step_prev_hidden = L.InputLayer(shape=(None, hidden_dim))
l_gru = GRULayer(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity,
hidden_init_trainable=False)
l_gru_flat = L.ReshapeLayer(
l_gru, shape=(-1, hidden_dim)
)
l_output_flat = L.DenseLayer(
l_gru_flat,
num_units=output_dim,
nonlinearity=output_nonlinearity,
)
l_output = OpLayer(
l_output_flat,
op=lambda flat_output, l_input:
flat_output.reshape((l_input.shape[0], l_input.shape[1], -1)),
shape_op=lambda flat_output_shape, l_input_shape:
(l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
extras=[l_in]
)
l_step_hidden = l_gru.get_step_layer(l_step_input, l_step_prev_hidden)
l_step_output = L.DenseLayer(
l_step_hidden,
num_units=output_dim,
nonlinearity=output_nonlinearity,
W=l_output_flat.W,
b=l_output_flat.b,
)
self._l_in = l_in
self._hid_init_param = l_gru.h0
self._l_gru = l_gru
self._l_out = l_output
self._l_step_input = l_step_input
self._l_step_prev_hidden = l_step_prev_hidden
self._l_step_hidden = l_step_hidden
self._l_step_output = l_step_output
def __init__(self, rng, rstream, x, y, setting): # add cost
"""
Constructing the mlp model.
Arguments:
rng, rstream - random streams
"""
self.paramsEle = []
self.paramsHyper = []
self.layers = [ll.InputLayer((None, 3, 28, 28))]
self.layers.append(ll.ReshapeLayer(self.layers[-1], (None, 3*28*28)))
penalty = 0.
for num in [1000, 1000, 1000, 10]: # TODO: refactor it later
self.layers.append(DenseLayerWithReg(setting, self.layers[-1], num_units=num))
self.paramsEle += self.layers[-1].W
self.paramsEle += self.layers[-1].b
if setting.regL2 is not None:
tempL2 = self.layers[-1].L2 * T.sqr(self.layers[-1].W)
penalty += T.sum(tempL2)
self.paramsHyper += self.layers[-1].L2
self.y = self.layers[-1].output
self.prediction = T.argmax(self.y, axis=1)
self.penalty = penalty if penalty != 0. else T.constant(0.)
def stable(x, stabilize=True):
if stabilize:
x = T.where(T.isnan(x), 1000., x)
x = T.where(T.isinf(x), 1000., x)
return x
if setting.cost == 'categorical_crossentropy':
def costFun1(y, label):
return stable(-T.log(y[T.arange(label.shape[0]), label]),
stabilize=True)
else:
raise NotImplementedError
def costFunT1(*args, **kwargs):
return T.mean(costFun1(*args, **kwargs))
# cost function
self.trainCost = costFunT1(self.y, y)
self.classError = T.mean(T.cast(T.neq(self.guessLabel, y), 'float32'))
def _sample_trained_minibatch_gan(params_file, n, batch_size, rs):
import lasagne
from lasagne.init import Normal
import lasagne.layers as ll
import theano as th
from theano.sandbox.rng_mrg import MRG_RandomStreams
import theano.tensor as T
import nn
theano_rng = MRG_RandomStreams(rs.randint(2 ** 15))
lasagne.random.set_rng(np.random.RandomState(rs.randint(2 ** 15)))
noise_dim = (batch_size, 100)
noise = theano_rng.uniform(size=noise_dim)
ls = [ll.InputLayer(shape=noise_dim, input_var=noise)]
ls.append(nn.batch_norm(
ll.DenseLayer(ls[-1], num_units=4*4*512, W=Normal(0.05),
nonlinearity=nn.relu),
g=None))
ls.append(ll.ReshapeLayer(ls[-1], (batch_size,512,4,4)))
ls.append(nn.batch_norm(
nn.Deconv2DLayer(ls[-1], (batch_size,256,8,8), (5,5), W=Normal(0.05),
nonlinearity=nn.relu),
g=None)) # 4 -> 8
ls.append(nn.batch_norm(
nn.Deconv2DLayer(ls[-1], (batch_size,128,16,16), (5,5), W=Normal(0.05),
nonlinearity=nn.relu),
g=None)) # 8 -> 16
ls.append(nn.weight_norm(
nn.Deconv2DLayer(ls[-1], (batch_size,3,32,32), (5,5), W=Normal(0.05),
nonlinearity=T.tanh),
train_g=True, init_stdv=0.1)) # 16 -> 32
gen_dat = ll.get_output(ls[-1])
with np.load(params_file) as d:
params = [d['arr_{}'.format(i)] for i in range(9)]
ll.set_all_param_values(ls[-1], params, trainable=True)
sample_batch = th.function(inputs=[], outputs=gen_dat)
samps = []
while len(samps) < n:
samps.extend(sample_batch())
samps = np.array(samps[:n])
return samps
def get_generator(self, meanx, z0, y_1hot):
''' specify generator G0, gen_x = G0(z0, h1) '''
"""
#z0 = theano_rng.uniform(size=(self.args.batch_size, 16)) # uniform noise
gen0_layers = [LL.InputLayer(shape=(self.args.batch_size, 50), input_var=z0)] # Input layer for z0
gen0_layers.append(nn.batch_norm(LL.DenseLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[0], num_units=128, W=Normal(0.02), nonlinearity=nn.relu)),
num_units=128, W=Normal(0.02), nonlinearity=nn.relu))) # embedding, 50 -> 128
gen0_layer_z_embed = gen0_layers[-1]
#gen0_layers.append(LL.InputLayer(shape=(self.args.batch_size, 256), input_var=real_fc3)) # Input layer for real_fc3 in independent training, gen_fc3 in joint training
gen0_layers.append(LL.InputLayer(shape=(self.args.batch_size, 10), input_var=y_1hot)) # Input layer for real_fc3 in independent training, gen_fc3 in joint training
gen0_layer_fc3 = gen0_layers[-1]
gen0_layers.append(LL.ConcatLayer([gen0_layer_fc3,gen0_layer_z_embed], axis=1)) # concatenate noise and fc3 features
gen0_layers.append(LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[-1], num_units=256*5*5, W=Normal(0.02), nonlinearity=T.nnet.relu)),
(self.args.batch_size,256,5,5))) # fc
gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half',
W=Normal(0.02), nonlinearity=nn.relu))) # deconv
gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid',
W=Normal(0.02), nonlinearity=nn.relu))) # deconv
gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half',
W=Normal(0.02), nonlinearity=nn.relu))) # deconv
gen0_layers.append(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid',
W=Normal(0.02), nonlinearity=T.nnet.sigmoid)) # deconv
gen_x_pre = LL.get_output(gen0_layers[-1], deterministic=False)
gen_x = gen_x_pre - meanx
# gen_x_joint = LL.get_output(gen0_layers[-1], {gen0_layer_fc3: gen_fc3}, deterministic=False) - meanx
return gen0_layers, gen_x
"""
gen_x_layer_z = LL.InputLayer(shape=(self.args.batch_size, self.args.z0dim), input_var=z0) # z, 20
# gen_x_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_x_layer_z, num_units=128), g=None) # 20 -> 64
gen_x_layer_y = LL.InputLayer(shape=(self.args.batch_size, 10), input_var=y_1hot) # conditioned on real fc3 activations
gen_x_layer_y_z = LL.ConcatLayer([gen_x_layer_y,gen_x_layer_z],axis=1) #512+256 = 768
gen_x_layer_pool2 = LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen_x_layer_y_z, num_units=256*5*5)), (self.args.batch_size,256,5,5))
gen_x_layer_dconv2_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_pool2, (self.args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half',
W=Normal(0.02), nonlinearity=nn.relu))
gen_x_layer_dconv2_2 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_1, (self.args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid',
W=Normal(0.02), nonlinearity=nn.relu))
gen_x_layer_dconv1_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_2, (self.args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half',
W=Normal(0.02), nonlinearity=nn.relu))
gen_x_layer_x = nn.Deconv2DLayer(gen_x_layer_dconv1_1, (self.args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid',
W=Normal(0.02), nonlinearity=T.nnet.sigmoid)
# gen_x_layer_x = dnn.Conv2DDNNLayer(gen_x_layer_dconv1_2, 3, (1,1), pad=0, stride=1,
# W=Normal(0.02), nonlinearity=T.nnet.sigmoid)
gen_x_layers = [gen_x_layer_z, gen_x_layer_y, gen_x_layer_y_z, gen_x_layer_pool2, gen_x_layer_dconv2_1,
gen_x_layer_dconv2_2, gen_x_layer_dconv1_1, gen_x_layer_x]
gen_x_pre = LL.get_output(gen_x_layer_x, deterministic=False)
gen_x = gen_x_pre - meanx
return gen_x_layers, gen_x
def load_data():
xs = []
ys = []
for j in range(5):
d = unpickle('data/cifar-10-python/cifar-10-batches-py/data_batch_'+`j+1`)
x = d['data']
y = d['labels']
xs.append(x)
ys.append(y)
d = unpickle('data/cifar-10-python/cifar-10-batches-py/test_batch')
xs.append(d['data'])
ys.append(d['labels'])
x = np.concatenate(xs)/np.float32(255)
y = np.concatenate(ys)
x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:]))
x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0,3,1,2)
# subtract per-pixel mean
pixel_mean = np.mean(x[0:50000],axis=0)
#pickle.dump(pixel_mean, open("cifar10-pixel_mean.pkl","wb"))
x -= pixel_mean
# create mirrored images
X_train = x[0:50000,:,:,:]
Y_train = y[0:50000]
# X_train_flip = X_train[:,:,:,::-1]
# Y_train_flip = Y_train
# X_train = np.concatenate((X_train,X_train_flip),axis=0)
# Y_train = np.concatenate((Y_train,Y_train_flip),axis=0)
X_test = x[50000:,:,:,:]
Y_test = y[50000:]
return pixel_mean, dict(
X_train=lasagne.utils.floatX(X_train),
Y_train=Y_train.astype('int32'),
X_test = lasagne.utils.floatX(X_test),
Y_test = Y_test.astype('int32'),)
## specify generator, gen_pool5 = G(z, y_1hot)
#z = theano_rng.uniform(size=(args.batch_size, 100)) # uniform noise
#y_1hot = T.matrix()
#gen_pool5_layer_z = LL.InputLayer(shape=(args.batch_size, 100), input_var=z) # z, 100
#gen_pool5_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_z, num_units=256, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # 100 -> 256
#gen_pool5_layer_y = LL.InputLayer(shape=(args.batch_size, 10), input_var=y_1hot) # y, 10
#gen_pool5_layer_y_embed = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_y, num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # 10 -> 512
#gen_pool5_layer_fc4 = LL.ConcatLayer([gen_pool5_layer_z_embed,gen_pool5_layer_y_embed],axis=1) #512+256 = 768
##gen_pool5_layer_fc4 = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc5, num_units=512, nonlinearity=T.nnet.relu))#, g=None)
#gen_pool5_layer_fc3 = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc4, num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None)
#gen_pool5_layer_pool5_flat = LL.DenseLayer(gen_pool5_layer_fc3, num_units=4*4*32, nonlinearity=T.nnet.relu) # NO batch normalization at output layer
##gen_pool5_layer_pool5_flat = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc3, num_units=4*4*32, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # no batch-norm at output layer
#gen_pool5_layer_pool5 = LL.ReshapeLayer(gen_pool5_layer_pool5_flat, (args.batch_size,32,4,4))
#gen_pool5_layers = [gen_pool5_layer_z, gen_pool5_layer_z_embed, gen_pool5_layer_y, gen_pool5_layer_y_embed, #gen_pool5_layer_fc5,
# gen_pool5_layer_fc4, gen_pool5_layer_fc3, gen_pool5_layer_pool5_flat, gen_pool5_layer_pool5]
#gen_pool5 = LL.get_output(gen_pool5_layer_pool5, deterministic=False)
def create_model(incoming, options):
input_p = 0.2
hidden_p = 0.5
conv_num_filters1 = int(100 / (1.0 - input_p))
conv_num_filters2 = int(150 / (1.0 - hidden_p))
conv_num_filters3 = int(200 / (1.0 - hidden_p))
filter_size1 = 5
filter_size2 = 5
filter_size3 = 3
pool_size = 2
encode_size = int(options['BOTTLENECK'] / 0.5)
dense_mid_size = int(options['DENSE'] / 0.5)
pad_in = 'valid'
pad_out = 'full'
scaled_tanh = create_scaled_tanh()
dropout0 = DropoutLayer(incoming, p=0.2, name='dropout0')
conv2d1 = Conv2DLayer(dropout0, num_filters=conv_num_filters1, filter_size=filter_size1, pad=pad_in, name='conv2d1', nonlinearity=scaled_tanh)
maxpool2d2 = MaxPool2DLayer(conv2d1, pool_size=pool_size, name='maxpool2d2')
dropout1 = DropoutLayer(maxpool2d2, name='dropout1')
conv2d3 = Conv2DLayer(dropout1, num_filters=conv_num_filters2, filter_size=filter_size2, pad=pad_in, name='conv2d3', nonlinearity=scaled_tanh)
maxpool2d4 = MaxPool2DLayer(conv2d3, pool_size=pool_size, name='maxpool2d4', pad=(1,0))
dropout2 = DropoutLayer(maxpool2d4, name='dropout2')
conv2d5 = Conv2DLayer(dropout2, num_filters=conv_num_filters3, filter_size=filter_size3, pad=pad_in, name='conv2d5', nonlinearity=scaled_tanh)
reshape6 = ReshapeLayer(conv2d5, shape=([0], -1), name='reshape6') # 3000
reshape6_output = reshape6.output_shape[1]
dropout3 = DropoutLayer(reshape6, name='dropout3')
dense7 = DenseLayer(dropout3, num_units=dense_mid_size, name='dense7', nonlinearity=scaled_tanh)
dropout4 = DropoutLayer(dense7, name='dropout4')
bottleneck = DenseLayer(dropout4, num_units=encode_size, name='bottleneck', nonlinearity=linear)
# print_network(bottleneck)
dense8 = DenseLayer(bottleneck, num_units=dense_mid_size, W=bottleneck.W.T, name='dense8', nonlinearity=linear)
dense9 = DenseLayer(dense8, num_units=reshape6_output, W=dense7.W.T, nonlinearity=scaled_tanh, name='dense9')
reshape10 = ReshapeLayer(dense9, shape=([0], conv_num_filters3, 3, 5), name='reshape10') # 32 x 4 x 7
deconv2d11 = Deconv2DLayer(reshape10, conv2d5.input_shape[1], conv2d5.filter_size, stride=conv2d5.stride,
W=conv2d5.W, flip_filters=not conv2d5.flip_filters, name='deconv2d11', nonlinearity=scaled_tanh)
upscale2d12 = Upscale2DLayer(deconv2d11, scale_factor=pool_size, name='upscale2d12')
deconv2d13 = Deconv2DLayer(upscale2d12, conv2d3.input_shape[1], conv2d3.filter_size, stride=conv2d3.stride,
W=conv2d3.W, flip_filters=not conv2d3.flip_filters, name='deconv2d13', nonlinearity=scaled_tanh)
upscale2d14 = Upscale2DLayer(deconv2d13, scale_factor=pool_size, name='upscale2d14')
deconv2d15 = Deconv2DLayer(upscale2d14, conv2d1.input_shape[1], conv2d1.filter_size, stride=conv2d1.stride,
crop=(1, 0), W=conv2d1.W, flip_filters=not conv2d1.flip_filters, name='deconv2d14', nonlinearity=scaled_tanh)
reshape16 = ReshapeLayer(deconv2d15, ([0], -1), name='reshape16')
return reshape16, bottleneck
def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
lstm_size=250, win=T.iscalar('theta)'),
output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True):
weights, biases, shapes, nonlinearities = dbn
gate_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_in = InputLayer(input_shape, input_var, 'input')
l_mask = InputLayer(mask_shape, mask_var, 'mask')
symbolic_batchsize = l_in.input_var.shape[0]
symbolic_seqlen = l_in.input_var.shape[1]
l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
shapes,
nonlinearities,
['fc1', 'fc2', 'fc3', 'bottleneck'])
encoder_len = las.layers.get_output_shape(l_encoder)[-1]
l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
l_delta = DeltaLayer(l_reshape2, win, name='delta')
if use_blstm:
l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1',
use_peepholes)
# We'll combine the forward and backward layer output by summing.
# Merge layers take in lists of layers to merge as input.
l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
# reshape, flatten to 2 dimensions to run softmax on all timesteps
l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3')
else:
l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_softmax = DenseLayer(
l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')
l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
return l_out
def create_model_using_pretrained_encoder(weights, biases, input_shape, input_var, mask_shape, mask_var,
lstm_size=250, win=T.iscalar('theta'), output_classes=26,
w_init_fn=las.init.Orthogonal(),
use_peepholes=False, nonlinearities=rectify):
gate_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_in = InputLayer(input_shape, input_var, 'input')
l_mask = InputLayer(mask_shape, mask_var, 'mask')
symbolic_batchsize = l_in.input_var.shape[0]
symbolic_seqlen = l_in.input_var.shape[1]
l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
[2000, 1000, 500, 50],
[nonlinearities, nonlinearities, nonlinearities, linear],
['fc1', 'fc2', 'fc3', 'bottleneck'])
encoder_len = las.layers.get_output_shape(l_encoder)[-1]
l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
l_delta = DeltaLayer(l_reshape2, win, name='delta')
l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'bstm1',
use_peepholes)
# We'll combine the forward and backward layer output by summing.
# Merge layers take in lists of layers to merge as input.
l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_out = DenseLayer(
l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')
return l_out
def create_model(incoming, options):
conv_num_filters1 = 100
conv_num_filters2 = 150
conv_num_filters3 = 200
filter_size1 = 5
filter_size2 = 5
filter_size3 = 3
pool_size = 2
encode_size = options['BOTTLENECK']
dense_mid_size = options['DENSE']
pad_in = 'valid'
pad_out = 'full'
scaled_tanh = create_scaled_tanh()
dropout0 = DropoutLayer(incoming, p=0.2, name='dropout0')
conv2d1 = Conv2DLayer(dropout0, num_filters=conv_num_filters1, filter_size=filter_size1, pad=pad_in, name='conv2d1', nonlinearity=scaled_tanh)
bn1 = BatchNormLayer(conv2d1, name='batchnorm1')
maxpool2d2 = MaxPool2DLayer(bn1, pool_size=pool_size, name='maxpool2d2')
dropout1 = DropoutLayer(maxpool2d2, name='dropout1')
conv2d3 = Conv2DLayer(dropout1, num_filters=conv_num_filters2, filter_size=filter_size2, pad=pad_in, name='conv2d3', nonlinearity=scaled_tanh)
bn2 = BatchNormLayer(conv2d3, name='batchnorm2')
maxpool2d4 = MaxPool2DLayer(bn2, pool_size=pool_size, name='maxpool2d4', pad=(1,0))
dropout2 = DropoutLayer(maxpool2d4, name='dropout2')
conv2d5 = Conv2DLayer(dropout2, num_filters=conv_num_filters3, filter_size=filter_size3, pad=pad_in, name='conv2d5', nonlinearity=scaled_tanh)
bn3 = BatchNormLayer(conv2d5, name='batchnorm3')
reshape6 = ReshapeLayer(bn3, shape=([0], -1), name='reshape6') # 3000
reshape6_output = reshape6.output_shape[1]
dropout3 = DropoutLayer(reshape6, name='dropout3')
dense7 = DenseLayer(dropout3, num_units=dense_mid_size, name='dense7', nonlinearity=scaled_tanh)
bn4 = BatchNormLayer(dense7, name='batchnorm4')
dropout4 = DropoutLayer(bn4, name='dropout4')
bottleneck = DenseLayer(dropout4, num_units=encode_size, name='bottleneck', nonlinearity=linear)
# print_network(bottleneck)
dense8 = DenseLayer(bottleneck, num_units=dense_mid_size, W=bottleneck.W.T, name='dense8', nonlinearity=linear)
dense9 = DenseLayer(dense8, num_units=reshape6_output, W=dense7.W.T, nonlinearity=scaled_tanh, name='dense9')
reshape10 = ReshapeLayer(dense9, shape=([0], conv_num_filters3, 3, 5), name='reshape10') # 32 x 4 x 7
deconv2d11 = Deconv2DLayer(reshape10, conv2d5.input_shape[1], conv2d5.filter_size, stride=conv2d5.stride,
W=conv2d5.W, flip_filters=not conv2d5.flip_filters, name='deconv2d11', nonlinearity=scaled_tanh)
upscale2d12 = Upscale2DLayer(deconv2d11, scale_factor=pool_size, name='upscale2d12')
deconv2d13 = Deconv2DLayer(upscale2d12, conv2d3.input_shape[1], conv2d3.filter_size, stride=conv2d3.stride,
W=conv2d3.W, flip_filters=not conv2d3.flip_filters, name='deconv2d13', nonlinearity=scaled_tanh)
upscale2d14 = Upscale2DLayer(deconv2d13, scale_factor=pool_size, name='upscale2d14')
deconv2d15 = Deconv2DLayer(upscale2d14, conv2d1.input_shape[1], conv2d1.filter_size, stride=conv2d1.stride,
crop=(1, 0), W=conv2d1.W, flip_filters=not conv2d1.flip_filters, name='deconv2d14', nonlinearity=scaled_tanh)
reshape16 = ReshapeLayer(deconv2d15, ([0], -1), name='reshape16')
return reshape16, bottleneck
def create_pretrained_substream(weights, biases, input_shape, input_var, mask_shape, mask_var, name,
lstm_size=250, win=T.iscalar('theta'), nonlinearity=rectify,
w_init_fn=las.init.Orthogonal(), use_peepholes=True):
gate_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_input = InputLayer(input_shape, input_var, 'input_'+name)
l_mask = InputLayer(mask_shape, mask_var, 'mask')
symbolic_batchsize_raw = l_input.input_var.shape[0]
symbolic_seqlen_raw = l_input.input_var.shape[1]
l_reshape1_raw = ReshapeLayer(l_input, (-1, input_shape[-1]), name='reshape1_'+name)
l_encoder_raw = create_pretrained_encoder(l_reshape1_raw, weights, biases,
[2000, 1000, 500, 50],
[nonlinearity, nonlinearity, nonlinearity, linear],
['fc1_'+name, 'fc2_'+name, 'fc3_'+name, 'bottleneck_'+name])
input_len = las.layers.get_output_shape(l_encoder_raw)[-1]
l_reshape2 = ReshapeLayer(l_encoder_raw,
(symbolic_batchsize_raw, symbolic_seqlen_raw, input_len),
name='reshape2_'+name)
l_delta = DeltaLayer(l_reshape2, win, name='delta_'+name)
l_lstm = LSTMLayer(
l_delta, int(lstm_size), peepholes=use_peepholes,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='lstm_'+name)
return l_lstm
def _init_model(self, in_size, out_size, n_hid=10, learning_rate_sl=0.005, \
learning_rate_rl=0.005, batch_size=32, ment=0.1):
# 2-layer MLP
self.in_size = in_size # x and y coordinate
self.out_size = out_size # up, down, right, left
self.batch_size = batch_size
self.learning_rate = learning_rate_rl
self.n_hid = n_hid
input_var, turn_mask, act_mask, reward_var = T.ftensor3('in'), T.imatrix('tm'), \
T.itensor3('am'), T.fvector('r')
in_var = T.reshape(input_var, (input_var.shape[0]*input_var.shape[1],self.in_size))
l_mask_in = L.InputLayer(shape=(None,None), input_var=turn_mask)
pol_in = T.fmatrix('pol-h')
l_in = L.InputLayer(shape=(None,None,self.in_size), input_var=input_var)
l_pol_rnn = L.GRULayer(l_in, n_hid, hid_init=pol_in, mask_input=l_mask_in) # B x H x D
pol_out = L.get_output(l_pol_rnn)[:,-1,:]
l_den_in = L.ReshapeLayer(l_pol_rnn, (turn_mask.shape[0]*turn_mask.shape[1], n_hid)) # BH x D
l_out = L.DenseLayer(l_den_in, self.out_size, nonlinearity=lasagne.nonlinearities.softmax)
self.network = l_out
self.params = L.get_all_params(self.network)
# rl
probs = L.get_output(self.network) # BH x A
out_probs = T.reshape(probs, (input_var.shape[0],input_var.shape[1],self.out_size)) # B x H x A
log_probs = T.log(out_probs)
act_probs = (log_probs*act_mask).sum(axis=2) # B x H
ep_probs = (act_probs*turn_mask).sum(axis=1) # B
H_probs = -T.sum(T.sum(out_probs*log_probs,axis=2),axis=1) # B
self.loss = 0.-T.mean(ep_probs*reward_var + ment*H_probs)
updates = lasagne.updates.rmsprop(self.loss, self.params, learning_rate=learning_rate_rl, \
epsilon=1e-4)
self.inps = [input_var, turn_mask, act_mask, reward_var, pol_in]
self.train_fn = theano.function(self.inps, self.loss, updates=updates)
self.obj_fn = theano.function(self.inps, self.loss)
self.act_fn = theano.function([input_var, turn_mask, pol_in], [out_probs, pol_out])
# sl
sl_loss = 0.-T.mean(ep_probs)
sl_updates = lasagne.updates.rmsprop(sl_loss, self.params, learning_rate=learning_rate_sl, \
epsilon=1e-4)
self.sl_train_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss, \
updates=sl_updates)
self.sl_obj_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss)
def __init__(self, input_shape, output_dim, hidden_dim, hidden_nonlinearity=LN.rectify,
output_nonlinearity=None, name=None, input_var=None, input_layer=None):
if input_layer is None:
l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
else:
l_in = input_layer
l_step_input = L.InputLayer(shape=(None,) + input_shape)
l_step_prev_hidden = L.InputLayer(shape=(None, hidden_dim))
l_gru = GRULayer(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity,
hidden_init_trainable=False)
l_gru_flat = L.ReshapeLayer(
l_gru, shape=(-1, hidden_dim)
)
l_output_flat = L.DenseLayer(
l_gru_flat,
num_units=output_dim,
nonlinearity=output_nonlinearity,
)
l_output = OpLayer(
l_output_flat,
op=lambda flat_output, l_input:
flat_output.reshape((l_input.shape[0], l_input.shape[1], -1)),
shape_op=lambda flat_output_shape, l_input_shape:
(l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
extras=[l_in]
)
l_step_hidden = l_gru.get_step_layer(l_step_input, l_step_prev_hidden)
l_step_output = L.DenseLayer(
l_step_hidden,
num_units=output_dim,
nonlinearity=output_nonlinearity,
W=l_output_flat.W,
b=l_output_flat.b,
)
self._l_in = l_in
self._hid_init_param = l_gru.h0
self._l_gru = l_gru
self._l_out = l_output
self._l_step_input = l_step_input
self._l_step_prev_hidden = l_step_prev_hidden
self._l_step_hidden = l_step_hidden
self._l_step_output = l_step_output
def __init__(self, input_shape, output_dim, hidden_dim, hidden_nonlinearity=LN.rectify,
output_nonlinearity=None, name=None, input_var=None, input_layer=None):
if input_layer is None:
l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
else:
l_in = input_layer
l_step_input = L.InputLayer(shape=(None,) + input_shape)
l_step_prev_hidden = L.InputLayer(shape=(None, hidden_dim))
l_gru = GRULayer(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity,
hidden_init_trainable=False)
l_gru_flat = L.ReshapeLayer(
l_gru, shape=(-1, hidden_dim)
)
l_output_flat = L.DenseLayer(
l_gru_flat,
num_units=output_dim,
nonlinearity=output_nonlinearity,
)
l_output = OpLayer(
l_output_flat,
op=lambda flat_output, l_input:
flat_output.reshape((l_input.shape[0], l_input.shape[1], -1)),
shape_op=lambda flat_output_shape, l_input_shape:
(l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
extras=[l_in]
)
l_step_hidden = l_gru.get_step_layer(l_step_input, l_step_prev_hidden)
l_step_output = L.DenseLayer(
l_step_hidden,
num_units=output_dim,
nonlinearity=output_nonlinearity,
W=l_output_flat.W,
b=l_output_flat.b,
)
self._l_in = l_in
self._hid_init_param = l_gru.h0
self._l_gru = l_gru
self._l_out = l_output
self._l_step_input = l_step_input
self._l_step_prev_hidden = l_step_prev_hidden
self._l_step_hidden = l_step_hidden
self._l_step_output = l_step_output