def create_lstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name, use_peepholes=False):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units, peepholes=use_peepholes,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name=name)
return l_lstm
python类Gate()的实例源码
def create_lstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
return l_lstm
def create_lstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
return l_lstm
def create_lstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name, use_peepholes=True):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units, peepholes=use_peepholes,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
return l_lstm
def create_lstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name, use_peepholes=True):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask, peepholes=use_peepholes,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
return l_lstm
def exe_maxru(length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_taru = MAXRULayer(layer_input, num_units, max_length=length,
P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh,
resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
time_updategate=time_updategate, time_update=time_update,
only_return_final=True, name='MAXRU', p=0.)
# W = layer_taru.W_hid_to_hidden_update.sum()
# U = layer_taru.W_in_to_hidden_update.sum()
# b = layer_taru.b_hidden_update.sum()
layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, input_var, target_var, batch_size, length, position, binominal)
def exe_lstm(use_embedd, length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate,
peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM')
# W = layer_lstm.W_hid_to_cell.sum()
# U = layer_lstm.W_in_to_cell.sum()
# b = layer_lstm.b_cell.sum()
layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)
def exe_gru(use_embedd, length, num_units, position, binominal, reset_input):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(batch_size, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_gru = GRULayer_ANA(layer_input, num_units, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
reset_input=reset_input, only_return_final=True, name='GRU')
# W = layer_gru.W_hid_to_hidden_update.sum()
# U = layer_gru.W_in_to_hidden_update.sum()
# b = layer_gru.b_hidden_update.sum()
layer_output = DenseLayer(layer_gru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_gru, input_var, target_var, batch_size, length, position, binominal)
def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(),
cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(),
nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.),
backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0,
unroll_scan=False, precompute_input=True, mask_input=None, **kwargs):
super(CustomLSTMEncoder, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity,
cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps,
grad_clipping, unroll_scan, precompute_input, mask_input, False,
**kwargs)
def test_lnlstm_passthrough():
# Tests that the LSTM can simply pass through its input
l_in = InputLayer((4, 5, 6))
zero = lasagne.init.Constant(0.)
one = lasagne.init.Constant(1.)
pass_gate = Gate(zero, zero, zero, one, None)
no_gate = Gate(zero, zero, zero, zero, None)
in_pass_gate = Gate(
np.eye(6).astype(theano.config.floatX), zero, zero, zero, None)
l_rec = LNLSTMLayer(
l_in, 6, pass_gate, no_gate, in_pass_gate, pass_gate, None)
out = lasagne.layers.get_output(l_rec)
inp = np.arange(4*5*6).reshape(4, 5, 6).astype(theano.config.floatX)
# np.testing.assert_almost_equal(out.eval({l_in.input_var: inp}), inp)
def test_lstm_passthrough():
# Tests that the LSTM can simply pass through its input
l_in = InputLayer((4, 5, 6))
zero = lasagne.init.Constant(0.)
one = lasagne.init.Constant(1.)
pass_gate = Gate(zero, zero, zero, one, None)
no_gate = Gate(zero, zero, zero, zero, None)
in_pass_gate = Gate(
np.eye(6).astype(theano.config.floatX), zero, zero, zero, None)
l_rec = LSTMLayer(
l_in, 6, pass_gate, no_gate, in_pass_gate, pass_gate, None)
out = lasagne.layers.get_output(l_rec)
inp = np.arange(4*5*6).reshape(4, 5, 6).astype(theano.config.floatX)
np.testing.assert_almost_equal(out.eval({l_in.input_var: inp}), inp)
def generate_lstm_parameters():
gate_parameters = Gate(
W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
return gate_parameters, cell_parameters
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name, use_peepholes=False):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units, peepholes=use_peepholes,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters, peepholes=use_peepholes,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26,
w_init=las.init.Orthogonal()):
gate_parameters = Gate(
W_in=w_init, W_hid=w_init,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init, W_hid=w_init,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_in = InputLayer(input_shape, input_var, 'input')
l_mask = InputLayer(mask_shape, mask_var, 'mask')
f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm')
l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_out = DenseLayer(
l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')
return l_out
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name, use_peepholes=True):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask, peepholes=use_peepholes,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters, peepholes=use_peepholes,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name, use_peepholes=True):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask, peepholes=use_peepholes,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters, peepholes=use_peepholes,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26,
w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True):
gate_parameters = Gate(
W_in=w_init, W_hid=w_init,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init, W_hid=w_init,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_in = InputLayer(input_shape, input_var, 'input')
l_mask = InputLayer(mask_shape, mask_var, name='mask')
symbolic_seqlen = l_in.input_var.shape[1]
l_delta = DeltaLayer(l_in, window, name='delta')
if use_blstm:
f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
# reshape to (num_examples * seq_len, lstm_size)
l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape')
else:
l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_softmax = DenseLayer(
l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')
l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
return l_out
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name, use_peepholes=True):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units, peepholes=use_peepholes,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters, peepholes=use_peepholes,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_model(substreams, mask_shape, mask_var, lstm_size=250, output_classes=26,
fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True):
gate_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_mask = InputLayer(mask_shape, mask_var, 'mask')
symbolic_seqlen_raw = l_mask.input_var.shape[1]
# We'll combine the forward and backward layer output by summing.
# Merge layers take in lists of layers to merge as input.
if fusiontype == 'adasum':
l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1')
elif fusiontype == 'sum':
l_fuse = ElemwiseSumLayer(substreams, name='sum1')
elif fusiontype == 'concat':
l_fuse = ConcatLayer(substreams, axis=-1, name='concat')
f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')
# reshape to (num_examples * seq_len, lstm_size)
l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_softmax = DenseLayer(
l_reshape3, num_units=output_classes,
nonlinearity=las.nonlinearities.softmax, name='softmax')
l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes), name='output')
return l_out, l_fuse
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):
if cell_parameters is None:
cell_parameters = Gate()
if gate_parameters is None:
gate_parameters = Gate()
l_lstm = LSTMLayer(
l_incoming, hidden_units,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='f_{}'.format(name))
# The "backwards" layer is the same as the first,
# except that the backwards argument is set to True.
l_lstm_back = LSTMLayer(
l_incoming, hidden_units, ingate=gate_parameters,
mask_input=l_mask, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))
return l_lstm, l_lstm_back
def build_recur_dropout(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
# Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
# first get some necessary dimensions or parameters
conv_window = 3
# shape = [batch, n-step, c_dim, char_length]
# construct convolution layer
# shape = [batch, n-step, c_filters, output_length]
cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, _, pool_size = cnn_layer.output_shape
# construct max pool layer
# shape = [batch, n-step, c_filters, 1]
pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
# reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))
# finally, concatenate the two incoming layers together.
# shape = [batch, n-step, c_filter&w_dim]
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
# dropout for incoming
incoming = lasagne.layers.DropoutLayer(incoming, p=p, shared_axes=(1,))
ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
lstm_forward = LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
nonlinearity=nonlinearities.tanh, peepholes=False,
ingate=ingate_forward, outgate=outgate_forward,
forgetgate=forgetgate_forward, cell=cell_forward, p=p, name='forward')
ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
lstm_backward = LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
nonlinearity=nonlinearities.tanh, peepholes=False, backwards=True,
ingate=ingate_backward, outgate=outgate_backward,
forgetgate=forgetgate_backward, cell=cell_backward, p=p, name='backward')
# concatenate the outputs of forward and backward LSTMs to combine them.
bi_lstm_cnn = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm")
# shape = [batch, n-step, num_units]
bi_lstm_cnn = lasagne.layers.DropoutLayer(bi_lstm_cnn, p=p, shared_axes=(1,))
return ChainCRFLayer(bi_lstm_cnn, num_labels, mask_input=mask)
def build_RNN(architec, layer_input, layer_mask, num_units, grad_clipping):
def build_GRU(reset_input):
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
return GRULayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
reset_input=reset_input, only_return_final=True, p=0.5, name='GRU')
def build_LSTM():
ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
return LSTMLayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate,
peepholes=False, nonlinearity=nonlinearities.tanh,
only_return_final=True, p=0.5, name='LSTM')
def build_SGRU():
resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
hidden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
return SGRULayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden,
updategate=updategate, hidden_update=hidden_update,
only_return_final=True, p=0.5, name='SGRU')
if architec == 'gru0':
return build_GRU(False)
elif architec == 'gru1':
return build_GRU(True)
elif architec == 'lstm':
return build_LSTM()
elif architec == 'sgru':
return build_SGRU()
else:
raise ValueError('unkown architecture: %s' % architec)
def build_std_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p,
reset_input):
# Construct Bi-directional LSTM-CNNs-CRF with standard dropout.
# first get some necessary dimensions or parameters
conv_window = 3
# shape = [batch, n-step, c_dim, char_length]
incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p)
# construct convolution layer
# shape = [batch, n-step, c_filters, output_length]
cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, _, pool_size = cnn_layer.output_shape
# construct max pool layer
# shape = [batch, n-step, c_filters, 1]
pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
# reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))
# finally, concatenate the two incoming layers together.
# shape = [batch, n-step, c_filter&w_dim]
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
# dropout for incoming
incoming = lasagne.layers.DropoutLayer(incoming, p=0.2)
resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward,
updategate=updategate_forward, hidden_update=hidden_update_forward,
grad_clipping=grad_clipping, reset_input=reset_input, name='forward')
resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward,
updategate=updategate_backward, hidden_update=hidden_update_backward,
grad_clipping=grad_clipping, reset_input=reset_input, name='backward')
# concatenate the outputs of forward and backward LSTMs to combine them.
bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")
bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p)
# reshape bi-rnn-cnn to [batch * max_length, num_units]
bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
return layer_output
def build_std_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
# Construct Bi-directional LSTM-CNNs-CRF with standard dropout.
# first get some necessary dimensions or parameters
conv_window = 3
# shape = [batch, n-step, c_dim, char_length]
incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p)
# construct convolution layer
# shape = [batch, n-step, c_filters, output_length]
cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, _, pool_size = cnn_layer.output_shape
# construct max pool layer
# shape = [batch, n-step, c_filters, 1]
pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
# reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))
# finally, concatenate the two incoming layers together.
# shape = [batch, n-step, c_filter&w_dim]
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
# dropout for incoming
incoming = lasagne.layers.DropoutLayer(incoming, p=0.2)
resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
sgru_forward = SGRULayer(incoming, num_units, mask_input=mask,
resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward,
updategate=updategate_forward, hidden_update=hidden_update_forward,
grad_clipping=grad_clipping, name='forward')
resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True,
resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward,
updategate=updategate_backward, hidden_update=hidden_update_backward,
grad_clipping=grad_clipping, name='backward')
# concatenate the outputs of forward and backward LSTMs to combine them.
bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru")
bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p)
# reshape bi-rnn-cnn to [batch * max_length, num_units]
bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
return layer_output