python类ElemwiseSumLayer()的实例源码-面圈网

layers.py 文件源码项目：Neural-Photo-Editor 作者: ajbrock 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def MDBLOCK(incoming,num_filters,scales,name,nonlinearity):
    return NL(BN(ESL([incoming,
         MDCL(NL(BN(MDCL(NL(BN(incoming,name=name+'bnorm0'),nonlinearity),num_filters,scales,name),name=name+'bnorm1'),nonlinearity),
              num_filters,
              scales,
              name+'2')]),name=name+'bnorm2'),nonlinearity)  

# Gaussian Sample Layer for VAE from Tencia Lee

layers.py 文件源码项目：Neural-Photo-Editor 作者: ajbrock 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def GL(mu,ls):
    return([GSL(z_mu,z_ls) for z_mu,z_ls in zip(mu,ls)])

# Convenience function to return a residual layer. It's not really that much more convenient than ESL'ing,
# but I like being able to see when I'm using Residual connections as opposed to Elemwise-sums

layers.py 文件源码项目：Neural-Photo-Editor 作者: ajbrock 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def ResLayer(incoming, IB,nonlinearity):
    return NL(ESL([IB,incoming]),nonlinearity)


# Inverse autoregressive flow layer

layers.py 文件源码项目：Neural-Photo-Editor 作者: ajbrock 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def get_output_for(self,input, **kwargs):
        if input.ndim > 2:
            input = input.flatten(2)
        activation = T.dot(input, self.W*self.weights_mask)
        if self.b is not None:
            activation = activation + self.b.dimshuffle('x', 0)
        return self.nonlinearity(activation)


# Stripped-Down Direct Input masked layer: Combine this with ESL and a masked layer to get a true DIML.
# Consider making this a simultaneous subclass of MaskedLayer and elemwise sum layer for cleanliness
#  adopted from M.Germain

enhance.py 文件源码项目：supic 作者: Hirico 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def make_block(self, name, input, units):
        self.make_layer(name+'-A', input, units, alpha=0.1)
        # self.make_layer(name+'-B', self.last_layer(), units, alpha=1.0)
        return ElemwiseSumLayer([input, self.last_layer()]) if args.generator_residual else self.last_layer()

decoder.py 文件源码项目：KGP-ASR 作者: KGPML 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def getTrainedRNN():
    ''' Read from file and set the params (To Do: Refactor 
        so as to do this only once) '''
    input_size = 39
    hidden_size = 50
    num_output_classes = 29
    learning_rate = 0.001
    output_size = num_output_classes+1
    batch_size = None
    input_seq_length = None
    gradient_clipping = 5

    l_in = InputLayer(shape=(batch_size, input_seq_length, input_size))
    n_batch, n_time_steps, n_features = l_in.input_var.shape #Unnecessary in this version. Just collecting the info so that we can reshape the output back to the original shape
    # h_1 = DenseLayer(l_in, num_units=hidden_size, nonlinearity=clipped_relu)
    l_rec_forward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu)
    l_rec_backward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu, backwards=True)
    l_rec_accumulation = ElemwiseSumLayer([l_rec_forward,l_rec_backward])
    l_rec_reshaped = ReshapeLayer(l_rec_accumulation, (-1,hidden_size))
    l_h2 = DenseLayer(l_rec_reshaped, num_units=hidden_size, nonlinearity=clipped_relu)
    l_out = DenseLayer(l_h2, num_units=output_size, nonlinearity=lasagne.nonlinearities.linear)
    l_out_reshaped = ReshapeLayer(l_out, (n_batch, n_time_steps, output_size))#Reshaping back
    l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax)
    l_out_softmax_reshaped = ReshapeLayer(l_out_softmax, (n_batch, n_time_steps, output_size))


    with np.load('CTC_model.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(l_out_softmax_reshaped, param_values, trainable = True)
    output = lasagne.layers.get_output( l_out_softmax_reshaped )
    return l_in, output

enhance.py 文件源码项目：DeepRes 作者: Aneeshers 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def make_block(self, name, input, units):
        self.make_layer(name+'-A', input, units, alpha=0.1)
        # self.make_layer(name+'-B', self.last_layer(), units, alpha=1.0)
        return ElemwiseSumLayer([input, self.last_layer()]) if args.generator_residual else self.last_layer()

nn.py 文件源码项目：DeepMonster 作者: olimastro 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def resnet_block(input_, filter_size, num_filters,
                 activation=relu, downsample=False,
                 no_output_act=True,
                 use_shortcut=False,
                 use_wn=False,
                 W_init=Normal(0.02),
                 **kwargs):
    """
    Resnet block layer.
    """

    normalization = weight_norm if use_wn else batch_norm

    block = []
    _stride = 2 if downsample else 1
    # conv -> BN -> Relu
    block.append(normalization(conv_layer(input_, filter_size, num_filters,
                                       _stride, 'same', nonlinearity=activation,
                                       W=W_init
    )))
    # Conv -> BN
    block.append(normalization(conv_layer(block[-1], filter_size, num_filters, 1, 'same', nonlinearity=None,
                                       W=W_init)))

    if downsample or use_shortcut:
        shortcut = conv_layer(input_, 1, num_filters, _stride, 'valid', nonlinearity=None)
        block.append(ElemwiseSumLayer([shortcut, block[-1]]))
    else:
        block.append(ElemwiseSumLayer([input_, block[-1]]))

    if not no_output_act:
        block.append(NonlinearityLayer(block[-1], nonlinearity=activation))

    return block[-1]

enhance.py 文件源码项目：neural-enhance 作者: alexjc 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def make_block(self, name, input, units):
        self.make_layer(name+'-A', input, units, alpha=0.1)
        # self.make_layer(name+'-B', self.last_layer(), units, alpha=1.0)
        return ElemwiseSumLayer([input, self.last_layer()]) if args.generator_residual else self.last_layer()

layers.py 文件源码项目：gogh-figure 作者: joelmoniz 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def residual_block(resnet_in, num_styles=None, num_filters=None, filter_size=3, stride=1):
    if num_filters == None:
        num_filters = resnet_in.output_shape[1]

    conv1 = style_conv_block(resnet_in, num_styles, num_filters, filter_size, stride)
    conv2 = style_conv_block(conv1, num_styles, num_filters, filter_size, stride, linear)
    res_block = ElemwiseSumLayer([conv2, resnet_in])

    return res_block

nn.py 文件源码项目：WGAN_mnist 作者: rajeswar18 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def resnet_block(input_, filter_size, num_filters,
                 activation=relu, downsample=False,
                 no_output_act=True,
                 use_shortcut=False,
                 use_wn=False,
                 W_init=Normal(0.02),
                 **kwargs):
    """
    Resnet block layer.
    """

    normalization = weight_norm if use_wn else batch_norm

    block = []
    _stride = 2 if downsample else 1
    # conv -> BN -> Relu
    block.append(normalization(conv_layer(input_, filter_size, num_filters,
                                       _stride, 'same', nonlinearity=activation,
                                       W=W_init
    )))
    # Conv -> BN
    block.append(normalization(conv_layer(block[-1], filter_size, num_filters, 1, 'same', nonlinearity=None,
                                       W=W_init)))

    if downsample or use_shortcut:
        shortcut = conv_layer(input_, 1, num_filters, _stride, 'valid', nonlinearity=None)
        block.append(ElemwiseSumLayer([shortcut, block[-1]]))
    else:
        block.append(ElemwiseSumLayer([input_, block[-1]]))

    if not no_output_act:
        block.append(NonlinearityLayer(block[-1], nonlinearity=activation))

    return block[-1]

lstm_classifier_baseline.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26,
                 w_init=las.init.Orthogonal()):
    gate_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm')

    l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
    l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out

deltanet_v1.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26,
                 w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True):
    gate_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, name='mask')

    symbolic_seqlen = l_in.input_var.shape[1]
    l_delta = DeltaLayer(l_in, window, name='delta')

    if use_blstm:
        f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
        # reshape to (num_examples * seq_len, lstm_size)
        l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
    return l_out

lstm_classifier_majority_vote.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26,
                 w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True):
    gate_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_seqlen = l_in.input_var.shape[1]
    if use_blstm:
        f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')

        # reshape to (num_examples * seq_len, lstm_size)
        l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape')
    else:
        l_lstm = create_lstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
    return l_out

layers.py 文件源码项目：Neural-Photo-Editor 作者: ajbrock 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def MDCL(incoming,num_filters,scales,name,dnn=True):
    if dnn:
        from lasagne.layers.dnn import Conv2DDNNLayer as C2D
    # W initialization method--this should also work as Orthogonal('relu'), but I have yet to validate that as thoroughly.
    winit = initmethod(0.02)

    # Initialization method for the coefficients
    sinit = lasagne.init.Constant(1.0/(1+len(scales)))

    # Number of incoming channels
    ni =lasagne.layers.get_output_shape(incoming)[1]

    # Weight parameter--the primary parameter for this block
    W = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,lasagne.layers.get_output_shape(incoming)[1],3,3))),name=name+'W')

    # Primary Convolution Layer--No Dilation
    n = C2D(incoming = incoming,
                            num_filters = num_filters,
                            filter_size = [3,3],
                            stride = [1,1],
                            pad = (1,1),
                            W = W*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_base').dimshuffle(0,'x','x','x'), # Note the broadcasting dimshuffle for the num_filter scalars.
                            b = None,
                            nonlinearity = None,
                            name = name+'base'
                        )
    # List of remaining layers. This should probably just all be concatenated into a single list rather than being a separate deal.
    nd = []    
    for i,scale in enumerate(scales):

        # I don't think 0 dilation is technically defined (or if it is it's just the regular filter) but I use it here as a convenient keyword to grab the 1x1 mean conv.
        if scale==0:
            nd.append(C2D(incoming = incoming,
                            num_filters = num_filters,
                            filter_size = [1,1],
                            stride = [1,1],
                            pad = (0,0),
                            W = T.mean(W,axis=[2,3]).dimshuffle(0,1,'x','x')*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_1x1').dimshuffle(0,'x','x','x'),
                            b = None,
                            nonlinearity = None,
                            name = name+str(scale)))
        # Note the dimshuffles in this layer--these are critical as the current DilatedConv2D implementation uses a backward pass.
        else:
            nd.append(lasagne.layers.DilatedConv2DLayer(incoming = lasagne.layers.PadLayer(incoming = incoming, width=(scale,scale)),
                                num_filters = num_filters,
                                filter_size = [3,3],
                                dilation=(scale,scale),
                                W = W.dimshuffle(1,0,2,3)*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_'+str(scale)).dimshuffle('x',0,'x','x'),
                                b = None,
                                nonlinearity = None,
                                name =  name+str(scale)))
    return ESL(nd+[n])

# MDC-based Upsample Layer.
# This is a prototype I don't make use of extensively. It's operational but it doesn't seem to improve results yet.

deltanet_majority_vote.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True):

    weights, biases, shapes, nonlinearities = dbn

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
                                          shapes,
                                          nonlinearities,
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    if use_blstm:
        l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1',
                                           use_peepholes)

        # We'll combine the forward and backward layer output by summing.
        # Merge layers take in lists of layers to merge as input.
        l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
        # reshape, flatten to 2 dimensions to run softmax on all timesteps
        l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')

    return l_out

baseline_end2end.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 lstm_size=250, output_classes=26):

    dbn_layers = dbn.get_all_layers()
    weights = []
    biases = []
    weights.append(dbn_layers[1].W.astype('float32'))
    weights.append(dbn_layers[2].W.astype('float32'))
    weights.append(dbn_layers[3].W.astype('float32'))
    weights.append(dbn_layers[4].W.astype('float32'))
    biases.append(dbn_layers[1].b.astype('float32'))
    biases.append(dbn_layers[2].b.astype('float32'))
    biases.append(dbn_layers[3].b.astype('float32'))
    biases.append(dbn_layers[4].b.astype('float32'))

    gate_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(weights, biases, l_reshape1)
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    # l_delta = DeltaLayer(l_reshape2, win, name='delta')

    # l_lstm = create_lstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1')
    l_lstm, l_lstm_back = create_blstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')

    l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out

deltanet.py 文件源码项目：ip-avsr 作者: lzuwei 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def create_model_using_pretrained_encoder(weights, biases, input_shape, input_var, mask_shape, mask_var,
                                          lstm_size=250, win=T.iscalar('theta'), output_classes=26,
                                          w_init_fn=las.init.Orthogonal(),
                                          use_peepholes=False, nonlinearities=rectify):
    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
                                          [2000, 1000, 500, 50],
                                          [nonlinearities, nonlinearities, nonlinearities, linear],
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'bstm1',
                                       use_peepholes)

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')

    l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out