python类Gate()的实例源码-第2页-面圈网

bi-rnn-cnn.py 文件源码项目：NeuroNLP 作者: XuezheMax 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def build_recur_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p,
                            reset_input):
    # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,))

    resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward,
                           updategate=updategate_forward, hidden_update=hidden_update_forward,
                           grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='forward')

    resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward,
                            updategate=updategate_backward, hidden_update=hidden_update_backward,
                            grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")
    # shape = [batch, n-step, num_units]
    bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p, shared_axes=(1,))

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

bi-rnn-cnn.py 文件源码项目：NeuroNLP 作者: XuezheMax 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def build_recur_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
    # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,))

    resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_forward = SGRULayer(incoming, num_units, mask_input=mask,
                             resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward,
                             updategate=updategate_forward, hidden_update=hidden_update_forward,
                             grad_clipping=grad_clipping, p=p, name='forward')

    resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True,
                              resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward,
                              updategate=updategate_backward, hidden_update=hidden_update_backward,
                              grad_clipping=grad_clipping, p=p, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru")
    # shape = [batch, n-step, num_units]
    bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p, shared_axes=(1,))

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

custom_layers.py 文件源码项目：MachineComprehension 作者: sa-j 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(),
                 cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(),
                 nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.),
                 backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0,
                 precompute_input=True, mask_input=None,
                 encoder_mask_input=None, attention=False, word_by_word=False, **kwargs):
        super(CustomLSTMDecoder, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity,
                                                cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps,
                                                grad_clipping, False, precompute_input, mask_input, True,
                                                **kwargs)
        self.attention = attention
        self.word_by_word = word_by_word
        # encoder mask
        self.encoder_mask_incoming_index = -1
        if encoder_mask_input is not None:
            self.input_layers.append(encoder_mask_input)
            self.input_shapes.append(encoder_mask_input.output_shape)
            self.encoder_mask_incoming_index = len(self.input_layers) - 1
        # check encoder
        if not isinstance(self.cell_init, CustomLSTMEncoder) \
                or self.num_units != self.cell_init.num_units:
            raise ValueError('cell_init must be CustomLSTMEncoder'
                             ' and num_units should equal')
        self.r_init = None
        self.r_init = self.add_param(init.Constant(0.),
                                     (1, num_units), name="r_init",
                                     trainable=False, regularizable=False)
        if self.word_by_word:
            # rewrites
            self.attention = True
        if self.attention:
            if not isinstance(encoder_mask_input, lasagne.layers.Layer):
                raise ValueError('Attention mechnism needs encoder mask layer')
            # initializes attention weights
            self.W_y_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'V_pointer')
            self.W_h_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_h_attend')
            # doesn't need transpose
            self.w_attend = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer')
            self.W_p_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_p_attend')
            self.W_x_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_x_attend')
            if self.word_by_word:
                self.W_r_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_r_attend')
                self.W_t_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_t_attend')

networks.py 文件源码项目：LasagneNLP 作者: XuezheMax 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def build_BiLSTM(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, dropout=True,
                 in_to_out=False):
    # construct the forward and backward rnns. Now, Ws are initialized by Glorot initializer with default arguments.
    # Need to try other initializers for specific tasks.

    # dropout for incoming
    if dropout:
        incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)

    ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                          W_cell=lasagne.init.Uniform(range=0.1))
    outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                           W_cell=lasagne.init.Uniform(range=0.1))
    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                              W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    # now use tanh for nonlinear function of cell, need to try pure linear cell
    cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                        nonlinearity=nonlinearities.tanh)
    lstm_forward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                                            nonlinearity=nonlinearities.tanh, peepholes=peepholes,
                                            precompute_input=precompute_input,
                                            ingate=ingate_forward, outgate=outgate_forward,
                                            forgetgate=forgetgate_forward, cell=cell_forward, name='forward')

    ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                           W_cell=lasagne.init.Uniform(range=0.1))
    outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                            W_cell=lasagne.init.Uniform(range=0.1))
    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                               W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    # now use tanh for nonlinear function of cell, need to try pure linear cell
    cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                         nonlinearity=nonlinearities.tanh)
    lstm_backward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                                             nonlinearity=nonlinearities.tanh, peepholes=peepholes,
                                             precompute_input=precompute_input, backwards=True,
                                             ingate=ingate_backward, outgate=outgate_backward,
                                             forgetgate=forgetgate_backward, cell=cell_backward, name='backward')

    # concatenate the outputs of forward and backward RNNs to combine them.
    concat = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm")

    # dropout for output
    if dropout:
        concat = lasagne.layers.DropoutLayer(concat, p=0.5)

    if in_to_out:
        concat = lasagne.layers.concat([concat, incoming], axis=2)

    # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
    return concat

networks.py 文件源码项目：LasagneNLP 作者: XuezheMax 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def build_BiGRU(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, dropout=True, in_to_out=False):
    # construct the forward and backward grus. Now, Ws are initialized by Glorot initializer with default arguments.
    # Need to try other initializers for specific tasks.

    # dropout for incoming
    if dropout:
        incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)

    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                             W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                              W_cell=lasagne.init.Uniform(range=0.1))
    # now use tanh for nonlinear function of hidden gate
    hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                          nonlinearity=nonlinearities.tanh)
    gru_forward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                                          precompute_input=precompute_input,
                                          resetgate=resetgate_forward, updategate=updategate_forward,
                                          hidden_update=hidden_forward, name='forward')

    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                              W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                               W_cell=lasagne.init.Uniform(range=0.1))
    # now use tanh for nonlinear function of hidden gate
    hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                           nonlinearity=nonlinearities.tanh)
    gru_backward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                                           precompute_input=precompute_input, backwards=True,
                                           resetgate=resetgate_backward, updategate=updategate_backward,
                                           hidden_update=hidden_backward, name='backward')

    # concatenate the outputs of forward and backward GRUs to combine them.
    concat = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")

    # dropout for output
    if dropout:
        concat = lasagne.layers.DropoutLayer(concat, p=0.5)

    if in_to_out:
        concat = lasagne.layers.concat([concat, incoming], axis=2)

    # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
    return concat

deltanet_majority_vote.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True):

    weights, biases, shapes, nonlinearities = dbn

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
                                          shapes,
                                          nonlinearities,
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    if use_blstm:
        l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1',
                                           use_peepholes)

        # We'll combine the forward and backward layer output by summing.
        # Merge layers take in lists of layers to merge as input.
        l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
        # reshape, flatten to 2 dimensions to run softmax on all timesteps
        l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')

    return l_out

baseline_end2end.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 lstm_size=250, output_classes=26):

    dbn_layers = dbn.get_all_layers()
    weights = []
    biases = []
    weights.append(dbn_layers[1].W.astype('float32'))
    weights.append(dbn_layers[2].W.astype('float32'))
    weights.append(dbn_layers[3].W.astype('float32'))
    weights.append(dbn_layers[4].W.astype('float32'))
    biases.append(dbn_layers[1].b.astype('float32'))
    biases.append(dbn_layers[2].b.astype('float32'))
    biases.append(dbn_layers[3].b.astype('float32'))
    biases.append(dbn_layers[4].b.astype('float32'))

    gate_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(weights, biases, l_reshape1)
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    # l_delta = DeltaLayer(l_reshape2, win, name='delta')

    # l_lstm = create_lstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1')
    l_lstm, l_lstm_back = create_blstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')

    l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out

avnet.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def create_pretrained_substream(weights, biases, input_shape, input_var, mask_shape, mask_var, name,
                                lstm_size=250, win=T.iscalar('theta'), nonlinearity=rectify,
                                w_init_fn=las.init.Orthogonal(), use_peepholes=True):
    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_input = InputLayer(input_shape, input_var, 'input_'+name)
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize_raw = l_input.input_var.shape[0]
    symbolic_seqlen_raw = l_input.input_var.shape[1]

    l_reshape1_raw = ReshapeLayer(l_input, (-1, input_shape[-1]), name='reshape1_'+name)
    l_encoder_raw = create_pretrained_encoder(l_reshape1_raw, weights, biases,
                                              [2000, 1000, 500, 50],
                                              [nonlinearity, nonlinearity, nonlinearity, linear],
                                              ['fc1_'+name, 'fc2_'+name, 'fc3_'+name, 'bottleneck_'+name])
    input_len = las.layers.get_output_shape(l_encoder_raw)[-1]

    l_reshape2 = ReshapeLayer(l_encoder_raw,
                                  (symbolic_batchsize_raw, symbolic_seqlen_raw, input_len),
                                  name='reshape2_'+name)
    l_delta = DeltaLayer(l_reshape2, win, name='delta_'+name)

    l_lstm = LSTMLayer(
        l_delta, int(lstm_size), peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_'+name)

    return l_lstm