def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice_short_seq(self):
input_shape = 14
sequence_length = 2
batch_size = 1
_, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge(
input_shape=input_shape,
sequence_length=sequence_length,
batch_size=batch_size,
output_shape=4)
states = T.tensor3('states')
lstm_out = lasagne.layers.get_output(l_lstm, states)
slice_out = lasagne.layers.get_output(l_slice, states)
run = theano.function([states], [lstm_out, slice_out])
sample_states = np.zeros((batch_size, sequence_length, input_shape))
sample_lstm_out, sample_slice_out = run(sample_states)
self.assertEquals(sample_lstm_out[:, 1::2, :].tolist(), sample_slice_out.tolist())
python类tensor3()的实例源码
def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice_len_1_seq(self):
input_shape = 14
sequence_length = 1
batch_size = 1
l_out, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge(
input_shape=input_shape,
sequence_length=sequence_length,
batch_size=batch_size,
output_shape=4,
start=0,
downsample=3)
states = T.tensor3('states')
l_out_out = lasagne.layers.get_output(l_out, states)
lstm_out = lasagne.layers.get_output(l_lstm, states)
slice_out = lasagne.layers.get_output(l_slice, states)
run = theano.function([states], [l_out_out, lstm_out, slice_out])
sample_states = np.zeros((batch_size, sequence_length, input_shape))
sample_out, sample_lstm_out, sample_slice_out = run(sample_states)
self.assertEquals(sample_lstm_out[:, 0::3, :].tolist(), sample_slice_out.tolist())
def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice_longer_len_seq(self):
input_shape = 14
sequence_length = 7
batch_size = 1
l_out, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge(
input_shape=input_shape,
sequence_length=sequence_length,
batch_size=batch_size,
output_shape=4,
start=0,
downsample=3)
states = T.tensor3('states')
l_out_out = lasagne.layers.get_output(l_out, states)
lstm_out = lasagne.layers.get_output(l_lstm, states)
slice_out = lasagne.layers.get_output(l_slice, states)
run = theano.function([states], [l_out_out, lstm_out, slice_out])
sample_states = np.zeros((batch_size, sequence_length, input_shape))
sample_out, sample_lstm_out, sample_slice_out = run(sample_states)
self.assertEquals(sample_lstm_out[:, 0::3, :].tolist(), sample_slice_out.tolist())
def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice_shared_var(self):
input_shape = 14
sequence_length = 1
batch_size = 1
_, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge(
input_shape=input_shape,
sequence_length=sequence_length,
batch_size=batch_size,
output_shape=4)
states = T.tensor3('states')
lstm_out = lasagne.layers.get_output(l_lstm, states)
slice_out = lasagne.layers.get_output(l_slice, states)
states_shared = theano.shared(np.zeros((batch_size, sequence_length, input_shape)))
run = theano.function([], [lstm_out, slice_out], givens={states: states_shared})
sample_states = np.zeros((batch_size, sequence_length, input_shape))
states_shared.set_value(sample_states)
sample_lstm_out, sample_slice_out = run()
self.assertEquals(sample_lstm_out[:, 1::2, :].tolist(), sample_slice_out.tolist())
def build(self):
input_dim = self.input_shape[2]
self.input = T.tensor3()
self.W_shape = (self.nb_filter, input_dim, self.filter_length, 1)
self.W = self.init(self.W_shape)
self.b = shared_zeros((self.nb_filter,))
self.params = [self.W, self.b]
self.regularizers = []
if self.W_regularizer:
self.W_regularizer.set_param(self.W)
self.regularizers.append(self.W_regularizer)
if self.b_regularizer:
self.b_regularizer.set_param(self.b)
self.regularizers.append(self.b_regularizer)
if self.activity_regularizer:
self.activity_regularizer.set_layer(self)
self.regularizers.append(self.activity_regularizer)
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
def build(self):
input_dim = self.input_shape[2]
self.input = T.tensor3()
self.W_z = self.init((input_dim, self.output_dim))
self.U_z = self.inner_init((self.output_dim, self.output_dim))
self.b_z = shared_zeros((self.output_dim))
self.W_r = self.init((input_dim, self.output_dim))
self.U_r = self.inner_init((self.output_dim, self.output_dim))
self.b_r = shared_zeros((self.output_dim))
self.W_h = self.init((input_dim, self.output_dim))
self.U_h = self.inner_init((self.output_dim, self.output_dim))
self.b_h = shared_zeros((self.output_dim))
self.params = [
self.W_z, self.U_z, self.b_z,
self.W_r, self.U_r, self.b_r,
self.W_h, self.U_h, self.b_h,
]
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
def build(self):
input_dim = self.input_shape[2]
self.input = T.tensor3()
self.W_z = self.init((input_dim, self.output_dim))
self.U_z = self.inner_init((self.output_dim, self.output_dim))
self.b_z = shared_zeros((self.output_dim))
self.W_r = self.init((input_dim, self.output_dim))
self.U_r = self.inner_init((self.output_dim, self.output_dim))
self.b_r = shared_zeros((self.output_dim))
self.W_h = self.init((input_dim, self.output_dim))
self.U_h = self.inner_init((self.output_dim, self.output_dim))
self.b_h = shared_zeros((self.output_dim))
self.params = [
self.W_z, self.U_z, self.b_z,
self.W_r, self.U_r, self.b_r,
self.W_h, self.U_h, self.b_h,
]
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
def build(self):
input_dim = self.input_shape[2]
self.input = T.tensor3()
self.W = self.init((input_dim, self.output_dim))
self.b = shared_zeros((self.output_dim))
self.params = [self.W, self.b]
self.regularizers = []
if self.W_regularizer:
self.W_regularizer.set_param(self.W)
self.regularizers.append(self.W_regularizer)
if self.b_regularizer:
self.b_regularizer.set_param(self.b)
self.regularizers.append(self.b_regularizer)
if self.activity_regularizer:
self.activity_regularizer.set_layer(self)
self.regularizers.append(self.activity_regularizer)
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
def build_encoder_bi(tparams, options):
"""
build bidirectional encoder, given pre-computed word embeddings
"""
# word embedding (source)
embedding = tensor.tensor3('embedding', dtype='float32')
embeddingr = embedding[::-1]
x_mask = tensor.matrix('x_mask', dtype='float32')
xr_mask = x_mask[::-1]
# encoder
proj = get_layer(options['encoder'])[1](tparams, embedding, options,
prefix='encoder',
mask=x_mask)
projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
prefix='encoder_r',
mask=xr_mask)
ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)
return embedding, x_mask, ctx
# some utilities
def __init__(self, input_dim, output_dim,
init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None,
truncate_gradient=-1, return_sequences=False):
super(SimpleRNN,self).__init__()
self.init = initializations.get(init)
self.inner_init = initializations.get(inner_init)
self.input_dim = input_dim
self.output_dim = output_dim
self.truncate_gradient = truncate_gradient
self.activation = activations.get(activation)
self.return_sequences = return_sequences
self.input = T.tensor3()
self.W = self.init((self.input_dim, self.output_dim))
self.U = self.inner_init((self.output_dim, self.output_dim))
self.b = shared_zeros((self.output_dim))
self.params = [self.W, self.U, self.b]
if weights is not None:
self.set_weights(weights)
def __init__(self, input_dim, output_dim, depth=3,
init='glorot_uniform', inner_init='orthogonal',
activation='sigmoid', inner_activation='hard_sigmoid',
weights=None, truncate_gradient=-1, return_sequences=False):
super(SimpleDeepRNN,self).__init__()
self.init = initializations.get(init)
self.inner_init = initializations.get(inner_init)
self.input_dim = input_dim
self.output_dim = output_dim
self.truncate_gradient = truncate_gradient
self.activation = activations.get(activation)
self.inner_activation = activations.get(inner_activation)
self.depth = depth
self.return_sequences = return_sequences
self.input = T.tensor3()
self.W = self.init((self.input_dim, self.output_dim))
self.Us = [self.inner_init((self.output_dim, self.output_dim)) for _ in range(self.depth)]
self.b = shared_zeros((self.output_dim))
self.params = [self.W] + self.Us + [self.b]
if weights is not None:
self.set_weights(weights)
def build_encoder(tparams, options):
"""
build an encoder, given pre-computed word embeddings
"""
# word embedding (source)
embedding = tensor.tensor3('embedding', dtype='float32')
x_mask = tensor.matrix('x_mask', dtype='float32')
# encoder
proj = get_layer(options['encoder'])[1](tparams, embedding, options,
prefix='encoder',
mask=x_mask)
ctx = proj[0][-1]
return embedding, x_mask, ctx
def build_encoder(tparams, options):
"""
build an encoder, given pre-computed word embeddings
"""
# word embedding (source)
embedding = tensor.tensor3('embedding', dtype='float32')
x_mask = tensor.matrix('x_mask', dtype='float32')
# encoder
proj = get_layer(options['encoder'])[1](tparams, embedding, options,
prefix='encoder',
mask=x_mask)
ctx = proj[0][-1]
return embedding, x_mask, ctx
def ndim_tensor(ndim):
if ndim == 1:
return T.vector()
elif ndim == 2:
return T.matrix()
elif ndim == 3:
return T.tensor3()
elif ndim == 4:
return T.tensor4()
return T.matrix()
# get int32 tensor
def init_func(self, img_value, scene_value):
if self._proj_func is None:
img = T.tensor3()
self._proj_func = theano.function([img], self.proj_mlp.compute(img))
if self._init_func is None:
init_e = self._feat_shared.mean(axis=1)
init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
self._init_func = theano.function([], init_state)
self._feat_shared.set_value(self._proj_func(img_value))
self._scene_shared.set_value(scene_value)
return self._init_func()
def init_func(self, img_value):
if self._proj_func is None:
img = T.tensor3()
self._proj_func = theano.function([img], self.proj_mlp.compute(img))
if self._init_func is None:
init_e = self._feat_shared.mean(axis=1)
init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
self._init_func = theano.function([], init_state)
self._feat_shared.set_value(self._proj_func(img_value))
return self._init_func()
def build_model(tparams, leavesList, ancestorsList, options):
dropoutRate = options['dropoutRate']
trng = RandomStreams(123)
use_noise = theano.shared(numpy_floatX(0.))
x = T.tensor3('x', dtype=config.floatX)
y = T.tensor3('y', dtype=config.floatX)
mask = T.matrix('mask', dtype=config.floatX)
lengths = T.vector('lengths', dtype=config.floatX)
n_timesteps = x.shape[0]
n_samples = x.shape[1]
embList = []
for leaves, ancestors in zip(leavesList, ancestorsList):
tempAttention = generate_attention(tparams, leaves, ancestors)
tempEmb = (tparams['W_emb'][ancestors] * tempAttention[:,:,None]).sum(axis=1)
embList.append(tempEmb)
emb = T.concatenate(embList, axis=0)
x_emb = T.tanh(T.dot(x, emb))
hidden = gru_layer(tparams, x_emb, options)
hidden = dropout_layer(hidden, use_noise, trng, dropoutRate)
y_hat = softmax_layer(tparams, hidden) * mask[:,:,None]
logEps = 1e-8
cross_entropy = -(y * T.log(y_hat + logEps) + (1. - y) * T.log(1. - y_hat + logEps))
output_loglikelihood = cross_entropy.sum(axis=2).sum(axis=0) / lengths
cost_noreg = T.mean(output_loglikelihood)
if options['L2'] > 0.:
cost = cost_noreg + options['L2'] * ((tparams['W_output']**2).sum() + (tparams['W_attention']**2).sum() + (tparams['v_attention']**2).sum())
return use_noise, x, y, mask, lengths, cost, cost_noreg, y_hat
def exe_maxru(length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_taru = MAXRULayer(layer_input, num_units, max_length=length,
P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh,
resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
time_updategate=time_updategate, time_update=time_update,
only_return_final=True, name='MAXRU', p=0.)
# W = layer_taru.W_hid_to_hidden_update.sum()
# U = layer_taru.W_in_to_hidden_update.sum()
# b = layer_taru.b_hidden_update.sum()
layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, input_var, target_var, batch_size, length, position, binominal)
def exe_lstm(use_embedd, length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate,
peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM')
# W = layer_lstm.W_hid_to_cell.sum()
# U = layer_lstm.W_in_to_cell.sum()
# b = layer_lstm.b_cell.sum()
layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)
def exe_gru(use_embedd, length, num_units, position, binominal, reset_input):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(batch_size, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_gru = GRULayer_ANA(layer_input, num_units, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
reset_input=reset_input, only_return_final=True, name='GRU')
# W = layer_gru.W_hid_to_hidden_update.sum()
# U = layer_gru.W_in_to_hidden_update.sum()
# b = layer_gru.b_hidden_update.sum()
layer_output = DenseLayer(layer_gru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_gru, input_var, target_var, batch_size, length, position, binominal)