def init_weights(fan_in,fan_out,init='he'):
def uniform(stdev, size):
"""uniform distribution with the given stdev and size"""
return numpy.random.uniform(
low=-stdev * numpy.sqrt(3),
high=stdev * numpy.sqrt(3),
size=size
).astype(theano.config.floatX)
if init == 'lecun' or (init == None and fan_in != fan_out):
weight_values = uniform(numpy.sqrt(1. / fan_in), (fan_in, fan_out))
elif init == 'he':
weight_values = uniform(numpy.sqrt(2. / fan_in), (fan_in, fan_out))
elif init == 'orthogonal' or (init == None and fan_in == fan_out):
# From lasagne
def sample(shape):
if len(shape) < 2:
raise RuntimeError("Only shapes of length 2 or more are "
"supported.")
flat_shape = (shape[0], numpy.prod(shape[1:]))
# TODO: why normal and not uniform?
a = numpy.random.normal(0.0, 1.0, flat_shape)
u, _, v = numpy.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape)
return q.astype(theano.config.floatX)
weight_values = sample((fan_in, fan_out))
return weight_values
python类floatX()的实例源码
def Dense(name, input_dim, output_dim, inputs, bias=True, init=None, weightnorm=True,hidden_dim=None):
weight_values = init_weights(input_dim,output_dim,init)
weight = lib.param(
name + '.W',
weight_values
)
batch_size = None
if inputs.ndim==3:
batch_size = inputs.shape[0]
inputs = inputs.reshape((-1,input_dim))
if weightnorm:
norm_values = numpy.linalg.norm(weight_values, axis=0)
norms = lib.param(
name + '.g',
norm_values
)
normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0)
result = T.dot(inputs, normed_weight)
else:
result = T.dot(inputs, weight)
if bias:
b = lib.param(
name + '.b',
numpy.zeros((output_dim,), dtype=theano.config.floatX)
)
result += b
result.name = name+".output"
if batch_size!=None:
return result.reshape((batch_size,hidden_dim,output_dim))
else:
return result
def Embedding(name, n_symbols, output_dim, indices):
vectors = lib.param(
name,
numpy.random.randn(
n_symbols,
output_dim
).astype(theano.config.floatX)
)
output_shape = tuple(list(indices.shape) + [output_dim])
return vectors[indices.flatten()].reshape(output_shape)
def softmax_and_sample(logits):
old_shape = logits.shape
flattened_logits = logits.reshape((-1, logits.shape[logits.ndim-1]))
samples = T.cast(
srng.multinomial(pvals=T.nnet.softmax(flattened_logits)),
theano.config.floatX
).reshape(old_shape)
return T.argmax(samples, axis=samples.ndim-1)
def GRUStep(name, input_dim, hidden_dim, x_t, h_tm1):
processed_input = lib.ops.Dense(
name+'.Input',
input_dim,
3 * hidden_dim,
x_t
)
gates = T.nnet.sigmoid(
lib.ops.Dense(
name+'.Recurrent_Gates',
hidden_dim,
2 * hidden_dim,
h_tm1,
bias=False
) + processed_input[:, :2*hidden_dim]
)
update = gates[:, :hidden_dim]
reset = gates[:, hidden_dim:]
scaled_hidden = reset * h_tm1
candidate = T.tanh(
lib.ops.Dense(
name+'.Recurrent_Candidate',
hidden_dim,
hidden_dim,
scaled_hidden,
bias=False,
init='orthogonal'
) + processed_input[:, 2*hidden_dim:]
)
one = lib.floatX(1.0)
return (update * candidate) + ((one - update) * h_tm1)
def relu(x):
# Using T.nnet.relu gives me NaNs. No idea why.
return T.switch(x > lib.floatX(0), x, lib.floatX(0))
def Conv1D(name, input_dim, output_dim, filter_size, inputs, apply_biases=True):
"""
inputs.shape: (batch size, height, input_dim)
output.shape: (batch size, height, output_dim)
* performs valid convs
"""
def uniform(stdev, size):
"""uniform distribution with the given stdev and size"""
return numpy.random.uniform(
low=-stdev * numpy.sqrt(3),
high=stdev * numpy.sqrt(3),
size=size
).astype(theano.config.floatX)
filters = lib.param(
name+'.Filters',
uniform(
1./numpy.sqrt(input_dim * filter_size),
# output dim, input dim, height, width
(output_dim, input_dim, filter_size, 1)
)
)
# conv2d takes inputs as (batch size, input channels, height[?], width[?])
inputs = inputs.reshape((inputs.shape[0], inputs.shape[1], 1, inputs.shape[2]))
inputs = inputs.dimshuffle(0, 3, 1, 2)
result = T.nnet.conv2d(inputs, filters, border_mode='valid', filter_flip=False)
if apply_biases:
biases = lib.param(
name+'.Biases',
numpy.zeros(output_dim, dtype=theano.config.floatX)
)
result = result + biases[None, :, None, None]
result = result.dimshuffle(0, 2, 3, 1)
return result.reshape((result.shape[0], result.shape[1], result.shape[3]))
def Skew(inputs):
"""
input.shape: (batch size, HEIGHT, WIDTH, dim)
"""
buffer = T.zeros(
(inputs.shape[0], inputs.shape[1], 2*inputs.shape[2] - 1, inputs.shape[3]),
theano.config.floatX
)
for i in xrange(HEIGHT):
buffer = T.inc_subtensor(buffer[:, i, i:i+WIDTH, :], inputs[:,i,:,:])
return buffer
def big_frame_level_rnn(input_sequences, h0, reset):
"""
input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE)
h0.shape: (batch size, N_BIG_GRUS, BIG_DIM)
reset.shape: ()
output[0].shape: (batch size, n frames, DIM)
output[1].shape: same as h0.shape
output[2].shape: (batch size, seq len, Q_LEVELS)
"""
learned_h0 = lib.param(
'BigFrameLevel.h0',
numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
frames = input_sequences.reshape((
input_sequences.shape[0],
input_sequences.shape[1] / BIG_FRAME_SIZE,
BIG_FRAME_SIZE
))
# Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
# (a reasonable range to pass as inputs to the RNN)
frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
frames *= lib.floatX(2)
gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_BIG_GRUS):
gru = lib.ops.LowMemGRU('BigFrameLevel.GRU'+str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
output = lib.ops.Linear(
'BigFrameLevel.Output',
BIG_DIM,
DIM * BIG_FRAME_SIZE / FRAME_SIZE,
grus[-1]
)
output = output.reshape((output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM))
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
independent_preds = lib.ops.Linear(
'BigFrameLevel.IndependentPreds',
BIG_DIM,
Q_LEVELS * BIG_FRAME_SIZE,
grus[-1]
)
independent_preds = independent_preds.reshape((independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS))
return (output, last_hidden, independent_preds)
def frame_level_rnn(input_sequences, other_input, h0, reset):
"""
input_sequences.shape: (batch size, n frames * FRAME_SIZE)
other_input.shape: (batch size, n frames, DIM)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, n frames * FRAME_SIZE, DIM)
"""
learned_h0 = lib.param(
'FrameLevel.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
frames = input_sequences.reshape((
input_sequences.shape[0],
input_sequences.shape[1] / FRAME_SIZE,
FRAME_SIZE
))
# Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
# (a reasonable range to pass as inputs to the RNN)
frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
frames *= lib.floatX(2)
gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM, frames) + other_input
gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', DIM, DIM, gru_input, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('FrameLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
output = lib.ops.Linear(
'FrameLevel.Output',
DIM,
FRAME_SIZE * DIM,
grus[-1],
initialization='he'
)
output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM))
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (output, last_hidden)
def sample_level_rnn(input_sequences, h0, reset):
"""
input_sequences.shape: (batch size, seq len)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, seq len, Q_LEVELS)
"""
learned_h0 = lib.param(
'SampleLevel.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
# Embedded inputs
#################
FRAME_SIZE = Q_LEVELS
frames = lib.ops.Embedding('SampleLevel.Embedding', Q_LEVELS, Q_LEVELS, input_sequences)
# Real-valued inputs
####################
# 'frames' of size 1
# FRAME_SIZE = 1
# frames = input_sequences.reshape((
# input_sequences.shape[0],
# input_sequences.shape[1],
# 1
# ))
# # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
# # (a reasonable range to pass as inputs to the RNN)
# frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
# frames *= lib.floatX(2)
gru0 = lib.ops.LowMemGRU('SampleLevel.GRU0', FRAME_SIZE, DIM, frames, h0=h0[:, 0])
# gru0 = T.nnet.relu(lib.ops.Linear('SampleLevel.GRU0FF', DIM, DIM, gru0, initialization='he'))
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('SampleLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
# gru = T.nnet.relu(lib.ops.Linear('SampleLevel.GRU'+str(i)+'FF', DIM, DIM, gru, initialization='he'))
grus.append(gru)
# We apply the softmax later
output = lib.ops.Linear(
'Output',
N_GRUS*DIM,
Q_LEVELS,
T.concatenate(grus, axis=2)
)
# output = lib.ops.Linear(
# 'Output',
# DIM,
# Q_LEVELS,
# grus[-1]
# )
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (output, last_hidden)
def frame_level_rnn(input_sequences, h0, reset):
"""
input_sequences.shape: (batch size, n frames * FRAME_SIZE)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, n frames * FRAME_SIZE, DIM)
"""
learned_h0 = lib.param(
'FrameLevel.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
frames = input_sequences.reshape((
input_sequences.shape[0],
input_sequences.shape[1] / FRAME_SIZE,
FRAME_SIZE
))
# Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
# (a reasonable range to pass as inputs to the RNN)
frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
frames *= lib.floatX(2)
gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', FRAME_SIZE, DIM, frames, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('FrameLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
output = lib.ops.Linear(
'FrameLevel.Output',
DIM,
FRAME_SIZE * DIM,
grus[-1],
initialization='he'
)
output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM))
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (output, last_hidden)
def frame_level_rnn(input_sequences, h0, reset):
"""
input_sequences.shape: (batch size, n frames * FRAME_SIZE)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, n frames * FRAME_SIZE, DIM)
"""
learned_h0 = lib.param(
'FrameLevel.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
frames = input_sequences.reshape((
input_sequences.shape[0],
input_sequences.shape[1] / FRAME_SIZE,
FRAME_SIZE
))
# Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
# (a reasonable range to pass as inputs to the RNN)
frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
frames *= lib.floatX(2)
gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', FRAME_SIZE, DIM, frames, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('FrameLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
output = lib.ops.Linear(
'FrameLevel.Output',
DIM,
FRAME_SIZE * DIM,
grus[-1],
initialization='he'
)
output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM))
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (output, last_hidden)
def sample_level_rnn(input_sequences, h0, reset):
"""
input_sequences.shape: (batch size, seq len)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, seq len, Q_LEVELS)
"""
learned_h0 = lib.param(
'SampleLevel.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
# Embedded inputs
#################
# FRAME_SIZE = Q_LEVELS
# frames = lib.ops.Embedding('SampleLevel.Embedding', Q_LEVELS, Q_LEVELS, input_sequences)
# Real-valued inputs
####################
# 'frames' of size 1
FRAME_SIZE = 1
frames = input_sequences.reshape((
input_sequences.shape[0],
input_sequences.shape[1],
1
))
# # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
# # (a reasonable range to pass as inputs to the RNN)
# frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
# frames *= lib.floatX(2)
gru0 = lib.ops.LowMemGRU('SampleLevel.GRU0', FRAME_SIZE, DIM, frames, h0=h0[:, 0])
# gru0 = T.nnet.relu(lib.ops.Linear('SampleLevel.GRU0FF', DIM, DIM, gru0, initialization='he'))
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('SampleLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
# gru = T.nnet.relu(lib.ops.Linear('SampleLevel.GRU'+str(i)+'FF', DIM, DIM, gru, initialization='he'))
grus.append(gru)
# We apply the softmax later
output = lib.ops.Linear(
'Output',
N_GRUS*DIM,
2,
T.concatenate(grus, axis=2)
)
# output = lib.ops.Linear(
# 'Output',
# DIM,
# Q_LEVELS,
# grus[-1]
# )
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (output, last_hidden)
def frame_level_rnn(input_sequences, h0, reset):
"""
input_sequences.shape: (batch size, n frames * FRAME_SIZE)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, n frames * FRAME_SIZE, DIM)
"""
learned_h0 = lib.param(
'FrameLevel.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
frames = input_sequences.reshape((
input_sequences.shape[0],
input_sequences.shape[1] / FRAME_SIZE,
FRAME_SIZE
))
# Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
# (a reasonable range to pass as inputs to the RNN)
frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
frames *= lib.floatX(2)
gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', FRAME_SIZE, DIM, frames, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('FrameLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
output = lib.ops.Linear(
'FrameLevel.Output',
DIM,
FRAME_SIZE * DIM,
grus[-1],
initialization='he'
)
output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM))
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (output, last_hidden)
def conv1d(
name,
input,
input_dim,
output_dim,
filter_size,
init = 'glorot',
non_linearity = 'relu',
bias = True
):
import lasagne
inp = input.dimshuffle(0,2,1,'x')
if init == 'glorot':
initializer = lasagne.init.GlorotUniform()
elif init == 'he':
initializer = lasagne.init.HeUniform()
if non_linearity == 'gated':
num_filters = 2*output_dim
else:
num_filters = output_dim
W_shape = (num_filters, input_dim, filter_size, 1)
if bias:
bias_shape = (num_filters,)
W = lib.param(name+".W", initializer.sample(W_shape))
if bias:
b = lib.param(name+".b", lasagne.init.Constant(0.).sample(bias_shape))
conv_out = T.nnet.conv2d(
inp, W,
filter_flip= False,
border_mode = 'valid'
)
if bias:
conv_out = conv_out + b[None,:,None, None]
if non_linearity == 'gated':
activation = gated_non_linerity
elif non_linearity == 'relu':
activation = T.nnet.relu
elif non_linearity == 'elu':
activation = lambda x : T.switch( x >= 0., x, T.exp(x) - floatX(1.))
elif non_linearity == 'identity':
activation = lambda x: x
else:
raise NotImplementedError("{} non-linearity not implemented!".format(non_linearity))
output = conv_out
output = output.reshape((output.shape[0], output.shape[1], output.shape[2]))
output = output.dimshuffle(0,2,1)
return output
def DilatedConv1D(name, input_dim, output_dim, filter_size, inputs, dilation, mask_type=None, apply_biases=True):
"""
inputs.shape: (batch size, length, input_dim)
mask_type: None, 'a', 'b'
output.shape: (batch size, length, output_dim)
"""
def uniform(stdev, size):
"""uniform distribution with the given stdev and size"""
return numpy.random.uniform(
low=-stdev * numpy.sqrt(3),
high=stdev * numpy.sqrt(3),
size=size
).astype(theano.config.floatX)
filters_init = uniform(
1./numpy.sqrt(input_dim * filter_size),
# output dim, input dim, height, width
(output_dim, input_dim, filter_size, 1)
)
if mask_type is not None:
filters_init *= lib.floatX(numpy.sqrt(2.))
filters = lib.param(
name+'.Filters',
filters_init
)
if mask_type is not None:
mask = numpy.ones(
(output_dim, input_dim, filter_size, 1),
dtype=theano.config.floatX
)
center = filter_size//2
for i in xrange(filter_size):
if (i > center):
mask[:, :, i, :] = 0.
# if (mask_type=='a' and i == center):
# mask[:, :, center] = 0.
filters = filters * mask
inputs = inputs.reshape((inputs.shape[0], inputs.shape[1], 1, inputs.shape[2]))
# conv2d takes inputs as (batch size, input channels, height[?], width[?])
inputs = inputs.dimshuffle(0, 3, 1, 2)
result = T.nnet.conv2d(inputs, filters, border_mode='half', filter_flip=False, filter_dilation=(dilation, 1))
if apply_biases:
biases = lib.param(
name+'.Biases',
numpy.zeros(output_dim, dtype=theano.config.floatX)
)
result = result + biases[None, :, None, None]
result = result.dimshuffle(0, 2, 3, 1)
return result.reshape((result.shape[0], result.shape[1], result.shape[3]))
def generate_and_save_samples(tag):
lib.save_params(os.path.join(OUT_DIR, tag + "_params.pkl"))
def save_images(images, filename, i = None):
"""images.shape: (batch, n channels, height, width)"""
if i is not None:
new_tag = "{}_{}".format(tag, i)
else:
new_tag = tag
images = images.reshape((10,10,28,28))
images = images.transpose(1,2,0,3)
images = images.reshape((10*28, 10*28))
image = scipy.misc.toimage(images, cmin=0.0, cmax=1.0)
image.save('{}/{}_{}.jpg'.format(OUT_DIR, filename, new_tag))
latents = np.random.normal(size=(100, LATENT_DIM))
latents = latents.astype(theano.config.floatX)
samples = np.zeros(
(100, N_CHANNELS, HEIGHT, WIDTH),
dtype=theano.config.floatX
)
next_sample = samples.copy()
t0 = time.time()
for j in xrange(HEIGHT):
for k in xrange(WIDTH):
for i in xrange(N_CHANNELS):
samples_p_value = sample_fn(latents, next_sample)
next_sample[:, i, j, k] = binarize(samples_p_value)[:, i, j, k]
samples[:, i, j, k] = samples_p_value[:, i, j, k]
t1 = time.time()
print("Time taken for generation {:.4f}".format(t1 - t0))
save_images(samples_p_value, 'samples')
def myGRU(name, input_dim, hidden_dim, inputs, h0=None):
#inputs.shape = (batch_size,N_FRAMES,FRAME_SIZE)
inputs = inputs.transpose(1,0,2)
weight_values = lasagne.init.GlorotUniform().sample((input_dim+hidden_dim,2*hidden_dim))
W1 = lib.param(
name+'.Gates.W',
weight_values
)
b1 = lib.param(
name+'.Gates.b',
np.ones(2*hidden_dim).astype(theano.config.floatX)
)
weight_values = lasagne.init.GlorotUniform().sample((input_dim+hidden_dim,hidden_dim))
W2 = lib.param(
name+'.Candidate.W',
weight_values
)
b2 = lib.param(
name+'.Candidate.b',
np.zeros(hidden_dim).astype(theano.config.floatX)
)
def step(x_t, h_tm1):
return recurrent_fn(
x_t,
h_tm1,
name,
input_dim,
hidden_dim,
W1,b1,W2,b2
)
outputs, _ = theano.scan(
step,
sequences=[inputs],
outputs_info=[h0],
)
out = outputs.dimshuffle(1,0,2)
out.name = name+'.output'
return out
def DiagonalLSTM(name, input_dim, inputs):
"""
inputs.shape: (batch size, height, width, input_dim)
outputs.shape: (batch size, height, width, DIM)
"""
inputs = Skew(inputs)
input_to_state = Conv2D(name+'.InputToState', input_dim, 4*DIM, 1, inputs, mask_type='b')
batch_size = inputs.shape[0]
c0_unbatched = lib.param(
name + '.c0',
numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX)
)
c0 = T.alloc(c0_unbatched, batch_size, HEIGHT, DIM)
h0_unbatched = lib.param(
name + '.h0',
numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX)
)
h0 = T.alloc(h0_unbatched, batch_size, HEIGHT, DIM)
def step_fn(current_input_to_state, prev_c, prev_h):
# all args have shape (batch size, height, DIM)
# TODO consider learning this padding
prev_h = T.concatenate([
T.zeros((batch_size, 1, DIM), theano.config.floatX),
prev_h
], axis=1)
state_to_state = Conv1D(name+'.StateToState', DIM, 4*DIM, 2, prev_h, apply_biases=False)
gates = current_input_to_state + state_to_state
o_f_i = T.nnet.sigmoid(gates[:,:,:3*DIM])
o = o_f_i[:,:,0*DIM:1*DIM]
f = o_f_i[:,:,1*DIM:2*DIM]
i = o_f_i[:,:,2*DIM:3*DIM]
g = T.tanh(gates[:,:,3*DIM:4*DIM])
new_c = (f * prev_c) + (i * g)
new_h = o * T.tanh(new_c)
return (new_c, new_h)
outputs, _ = theano.scan(
step_fn,
sequences=input_to_state.dimshuffle(2,0,1,3),
outputs_info=[c0, h0]
)
all_cs = outputs[0].dimshuffle(1,2,0,3)
all_hs = outputs[1].dimshuffle(1,2,0,3)
return Unskew(all_hs)