python类GRU的实例源码-第2页-面圈网

models.py 文件源码项目：cervantes 作者: textclf 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True):

        input = Input(shape=(lembedding.size,), dtype='int32')
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None)(input)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None, )(input)
        emb.trainable = train_vectors
        if unit == 'gru':
            forward = GRU(rnn_size)(emb)
            backward = GRU(rnn_size, go_backwards=True)(emb)
        else:
            forward = LSTM(rnn_size)(emb)
            backward = LSTM(rnn_size, go_backwards=True)(emb)

        merged_rnn = merge([forward, backward], mode='concat')
        dropped = Dropout(0.5)(merged_rnn)
        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(dropped)
            model = Model(input=input, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(dropped)
            model = Model(input=input, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model

models.py 文件源码项目：cervantes 作者: textclf 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def __init__(self, lembedding, num_classes=2, ngrams=[1, 2, 3, 4, 5],
                 nfilters=64, rnn_type=GRU, rnn_dim=80, train_vectors=True,
                 optimizer=None):

        if not isinstance(lembedding, TwoLevelsEmbedding):
            raise LanguageClassifierException(
                "The model only accepts two-level language embeddings")
        if num_classes < 2:
            raise LanguageClassifierException("Classes must be 2 or more")

        self.optimizer = optimizer
        model = self._generate_model(lembedding, num_classes, ngrams,
                                     nfilters, rnn_type, rnn_dim, train_vectors)
        super(RCNNClassifier, self).__init__(model, self.optimizer)

test_recurrent.py 文件源码项目：keras 作者: NVIDIA 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize("layer_class", [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f)

dga_lstm.py 文件源码项目：stratosphere-lstm 作者: mendozawow 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def build_lstm(input_shape):
    model = Sequential()
    model.add(Masking(input_shape=input_shape, mask_value=-1.))
    # model.add(GRU(128, return_sequences=True))

    model.add(GRU(128, return_sequences=False))
    # Add dropout if overfitting
    # model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model

test_recurrent.py 文件源码项目：deep-coref 作者: clarkkev 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_gru(self):
        _runner(recurrent.GRU)

test_tasks.py 文件源码项目：deep-coref 作者: clarkkev 项目源码文件源码阅读 92 收藏 0 点赞 0 评论 0

def test_temporal_reg(self):
        print('temporal regression data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5), output_shape=(2,),
                                                             classification=False)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        model = Sequential()
        model.add(GRU(y_train.shape[-1], input_shape=(None, X_train.shape[-1])))
        model.compile(loss='hinge', optimizer='adam')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
        self.assertTrue(history.history['val_loss'][-1] < 0.8)

test_tasks.py 文件源码项目：RecommendationSystem 作者: TURuibo 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_temporal_reg(self):
        print('temporal regression data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,),
            classification=False)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.compile(loss='hinge', optimizer='rmsprop')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
        self.assertTrue(history.validation_loss[-1] < 0.75)

rnnmlp.py 文件源码项目：music_rnn 作者: chengjunwen 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def LSTMModel(self, nHidden=150, lr = 0.01):
#               print('nHidden: %i\tlr: %.3f' % ( nHidden, lr) )
                self.rnnModel.add(GRU( nHidden, activation='sigmoid', input_shape =( None, self.maxFeatures), return_sequences=True))
#                self.rnnModel.add(LSTM( nHidden, activation='sigmoid', input_shape =( None, nHidden), return_sequences=True))
                self.rnnModel.add(TimeDistributedDense(nHidden))
                self.rnnModel.add(Activation('relu'))
                self.rnnModel.add(TimeDistributedDense(self.maxFeatures))
                self.rnnModel.add(Activation('softmax'))
                rmsprop = RMSprop(lr=lr, rho=0.9, epsilon=1e-06)
                self.rnnModel.compile(loss='categorical_crossentropy', optimizer=rmsprop)

lstmUtils.py 文件源码项目：RNNIPTag 作者: ml-slac 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def BuildModel():

    #global dataset_storage,model_storage,history_storage

    dataset = makeData (Variables = o.Variables)
    #dataset_storage = dataset

    model = None
    history = None
    modelname = "" 
    print o.Model
    if "LSTM" in o.Model or "GRU" in o.Model:
        model, history = buildModel_1hidden(dataset,True)
    if o.Model == "RNNSV1":
        model, history = buildModel_RNNSV1(dataset, True)
    if o.Model == "DenseIP3D":
        model, history = buildModel_SimpleDense(dataset, False)
    print ' ------------------------------------------'
    print o.Model
    if o.Model == "RNNPlusMV2" or o.Model == "RNNPlusSV1":
        model, history = buildModel_RNNPlus(dataset, useAdam=True)

    modelname = o.Version +"_" + o.Model + "_"+ o.Variables + "_" + o.nEpoch + "epoch_" + str( n_events/1000) + 'kEvts_' + str( o.nTrackCut) + 'nTrackCut_' +  o.nMaxTrack + "nMaxTrack_" + o.nLSTMClass +"nLSTMClass_" + o.nLSTMNodes +"nLSTMNodes_"+ o.nLayers + "nLayers"

    model = evalModel(dataset, model, o.Model)

    if o.TrackOrder == 'pT':
        modelname += "_SortpT"
    if o.TrackOrder == 'Reverse':
        modelname += "_ReverseOrder"
    if o.TrackOrder == 'SL0':
        modelname += "_SL0"
    if o.doTrainC == 'y':
        modelname += "_CMix"
    if o.AddJetpT == 'y':
        modelname += '_AddJetpT'
    if int(o.EmbedSize) != 2:
        modelname += "_" + o.EmbedSize+"EmbedSize"

    if o.Mode == "R":
        modelname = o.filebase+"_Retrain_"+o.nEpoch
    if o.doLessC == "y":
        modelname += "_LessC"

    if o.doJetpTReweight == "y":
        modelname += "_JetpTReweight"

    #modelname = "test"
    saveModel(modelname, model, history)

simple_sentence_breaker.py 文件源码项目：learning_rnn 作者: qiangsiwei 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def train_breaker(datafilename, sentence_num=1000, puncs=u',?.?!???', \
            RNN=recurrent.GRU, HIDDEN_SIZE=128, EPOCH_SIZE=10, validate=True):
    wordtable = WordTable()
    wordtable.parse(datafilename, sentence_num)

    X, Y = [], []
    for line in open(datafilename).readlines()[:sentence_num]:
        line = line.strip().decode('utf-8')
        line = re.sub(ur'(^[{0}]+)|([{0}]+$)'.format(puncs),'',line)
        words = wordtable.encode(re.sub(ur'[{0}]'.format(puncs),'',line))
        breaks = re.sub(ur'0[{0}]+'.format(puncs),'1',re.sub(ur'[^{0}]'.format(puncs),'0',line))
        if len(words) >= 30 and len(words) <= 50 and breaks.count('1') >= 4:
            x = np.zeros((len(words), wordtable.capacity), dtype=np.bool)
            y = np.zeros((len(breaks), 2), dtype=np.bool)
            for idx in xrange(len(words)):
                x[idx][words[idx]] = True
                y[idx][int(breaks[idx])] = True
            X.append(x)
            Y.append(y)
    print 'total sentence: ', len(X)

    if validate:
        # Set apart 10% for validation
        split_at = len(X) - len(X)/10
        X_train, X_val = X[:split_at], X[split_at:]
        y_train, y_val = Y[:split_at], Y[split_at:]
    else:
        X_train, y_train = X, Y

    model = Graph()
    model.add_input(name='input', input_shape=(None, wordtable.capacity))
    model.add_node(RNN(HIDDEN_SIZE, return_sequences=True), name='forward', input='input')
    model.add_node(TimeDistributedDense(2, activation='softmax'), name='softmax', input='forward')
    model.add_output(name='output', input='softmax')
    model.compile('adam', {'output': 'categorical_crossentropy'})

    for epoch in xrange(EPOCH_SIZE):
        print "epoch: ", epoch
        for idx, (seq, label) in enumerate(zip(X_train, y_train)):
            loss, accuracy = model.train_on_batch({'input':np.array([seq]), 'output':np.array([label])}, accuracy=True)
            if idx % 20 == 0:
                print "\tidx={0}, loss={1}, accuracy={2}".format(idx, loss, accuracy)

    if validate:
        _Y, _P = [], []
        for (seq, label) in zip(X_val, y_val):
            y = label.argmax(axis=-1)
            p = model.predict({'input':np.array([seq])})['output'][0].argmax(axis=-1)
            _Y.extend(list(y))
            _P.extend(list(p))
        _Y, _P = np.array(_Y), np.array(_P)
        print "should break right: ", ((_P == 1)*(_Y == 1)).sum()
        print "should break wrong: ", ((_P == 0)*(_Y == 1)).sum()
        print "should not break right: ", ((_P == 0)*(_Y == 0)).sum()
        print "should not break wrong: ", ((_P == 1)*(_Y == 0)).sum()

    with open('wordtable_json.txt','w') as wordtable_file:
        wordtable_file.write(wordtable.to_json())
    with open('model_json.txt','w') as model_file:
        model_file.write(model.to_json())
    model.save_weights('model_weights.h5', overwrite=True)

15-keras_seq2seq_mod.py 文件源码项目：albemarle 作者: SeanTater 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def get_state_transfer_rnn(RNN):
    '''Converts a given Recurrent sub class (e.g, LSTM, GRU) to its state transferable version.
    A state transfer RNN can transfer its hidden state to another one of the same type and compatible dimensions.
    '''

    class StateTransferRNN(RNN):

        def __init__(self, state_input=True, **kwargs):
            self.state_outputs = []
            self.state_input = state_input
            super(StateTransferRNN, self).__init__(**kwargs)

        def reset_states(self):
            stateful = self.stateful
            self.stateful = stateful or self.state_input or len(self.state_outputs) > 0
            if self.stateful:
                super(StateTransferRNN, self).reset_states()
            self.stateful = stateful

        def build(self,input_shape):
            stateful = self.stateful
            self.stateful = stateful or self.state_input or len(self.state_outputs) > 0
            super(StateTransferRNN, self).build(input_shape)
            self.stateful = stateful

        def broadcast_state(self, rnns):
            rnns = (set if type(rnns) in [list, tuple] else lambda a: {a})(rnns)
            rnns -= set(self.state_outputs)
            self.state_outputs.extend(rnns)
            for rnn in rnns:
                rnn.state_input = self
                rnn.updates = getattr(rnn, 'updates', [])
                rnn.updates.extend(zip(rnn.states, self.states_to_transfer))

        def call(self, x, mask=None):
            last_output, outputs, states = K.rnn(
                self.step,
                self.preprocess_input(x),
                self.states or self.get_initial_states(x),
                go_backwards=self.go_backwards,
                mask=mask,
                constants=self.get_constants(x),
                unroll=self.unroll,
                input_length=self.input_spec[0].shape[1])
            self.updates = zip(self.states, states)
            self.states_to_transfer = states
            return outputs if self.return_sequences else last_output
    return StateTransferRNN

Doc_CNN_LSTM.py 文件源码项目：NN_sentiment 作者: hx364 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def __init__(self, embedding_mat=None, maxlen_doc=7, maxlen_sent=50, filter_length=[3, 4, 5, 6],
                 nb_filters=200, n_vocab=10000, embedding_dims=300, hidden_gru=64, n_classes=5):
        if embedding_mat is not None:
            self.n_vocab, self.embedding_dims = embedding_mat.shape
        else:
            self.n_vocab = n_vocab
            self.embedding_dims = embedding_dims
        self.maxlen_doc = maxlen_doc
        self.maxlen_sent = maxlen_sent
        self.filter_length = filter_length
        self.nb_filters = nb_filters
        self.hidden_gru = hidden_gru

        print "Building the model"
        #graph model
        model=Graph()
        model.add_input(name='input', input_shape=(self.maxlen_doc*self.maxlen_sent,), dtype='int')

        #Model embedding layer, for word index-> word embedding transformation
        model.add_node(Embedding(self.n_vocab, self.embedding_dims, weights=[self.embedding_mat],
                                 input_length=self.maxlen_sent*self.maxlen_doc),
                       name='embedding', input='input')
        model.add_node(Reshape((self.maxlen_doc, 1, self.maxlen_sent, self.embedding_dims)),
                      name='reshape_5d', input='embedding')
        #define the different filters
        conv_layer = []
        for each_length in filter_length:
            model.add_node(TimeDistributedConvolution2D(self.nb_filters/len(filter_length),
                                                        each_length, self.embedding_dims, border_mode='valid',
                                               input_shape=(self.maxlen_doc,1,self.maxlen_sent, self.embedding_dims)),
                          name='conv_{}'.format(each_length), input='reshape_5d')
            model.add_node(Activation('relu'),
                          name='relu_conv_{}'.format(each_length), input='conv_{}'.format(each_length))

            model.add_node(TimeDistributedMaxPooling2D(pool_size=(int(self.maxlen_sent - each_length+1), 1),
                          border_mode='valid'),
                          name='pool_conv_{}'.format(each_length), input='relu_conv_{}'.format(each_length))
            model.add_node(TimeDistributedFlatten(),
                          name='flatten_conv_{}'.format(each_length), input='pool_conv_{}'.format(each_length))
            conv_layer.append('flatten_conv_{}'.format(each_length))
        # model.add_node(Activation('relu'), name='relu', inputs=conv_layer)
        print conv_layer
        model.add_node(GRU(self.hidden_gru), name='gru_forward', inputs=conv_layer)
        model.add_node(GRU(self.hidden_gru, go_backwards=True), name='gru_backward', inputs=conv_layer)
        model.add_node(Dropout(0.5), name='gru_outputs', inputs=['gru_forward', 'gru_backward'])
        model.add_node(Dense(n_classes), name='full_con', input='gru_outputs')
        model.add_node(Activation('softmax'), name='prob', input='full_con')
        model.add_output(name='pred', input='prob')

        model.compile('rmsprop', loss = {'pred': 'categorical_crossentropy'})

models.py 文件源码项目：cervantes 作者: textclf 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def _generate_model(self, lembedding, num_classes=2, rnn_dim=32):

        WORD_PER_SENTENCES = lembedding.size_level1
        SENTENCES_PER_DOCUMENT = lembedding.size_level2
        EMBEDDING_DIM = lembedding.vector_box.vector_dim

        INPUT_SHAPE = (WORD_PER_SENTENCES * SENTENCES_PER_DOCUMENT, )
        EMBEDDING_SHAPE = (SENTENCES_PER_DOCUMENT, WORD_PER_SENTENCES, EMBEDDING_DIM)

        doc = Input(shape=(INPUT_SHAPE[0], ), dtype='int32')

        embedded = Sequential([
            Embedding(
                input_dim=lembedding.vector_box.size,
                output_dim=EMBEDDING_DIM,
                input_length=INPUT_SHAPE[0]
            ),
            Reshape(EMBEDDING_SHAPE)
        ])(doc)

        out = TimeDistributed(GRU(rnn_dim))(embedded)
        next = Dropout(0.5)(out)
        out = GRU(rnn_dim)(next)
        out = Dropout(0.5)(out)

        mapping = [
            Dense(64, activation='relu'),  # Maybe add more layers
        ]

        for f in mapping:
            out = f(out)

        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(out)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(out)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model

nn.py 文件源码项目：event_chain 作者: wangzq870305 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def lstm_memory_train(X_train_list,y_train,vocab_size):
    N=len(X_train_list)

    X_train_list = [sequence.pad_sequences(x_train, maxlen=MAX_LEN) for x_train in X_train_list]

    input_list=[]
    out_list=[]
    for i in range(N):
        input,out=get_embedding_input_output('f%d' %i,vocab_size)
        input_list.append(input)
        out_list.append(out)

    x = merge(out_list,mode='concat')

    lstm_out = LSTM(HIDDEN_SIZE, return_sequences=True)(x)

    lstm_share=GRU(HIDDEN_SIZE, return_sequences=True)

    x = lstm_out
    for i in range(2):
        att = TimeDistributed(Dense(1))(x)
        att = Flatten()(att)
        att = Activation(activation="softmax")(att)
        att = RepeatVector(HIDDEN_SIZE)(att)
        att = Permute((2,1))(att)

        mer = merge([att, lstm_out], "mul")
        mer = merge([mer, out_list[-1]], 'mul')

        z = merge([lstm_out,mer],'sum')
        z = lstm_share(z)
        x = z

    hid = AveragePooling1D(pool_length=2)(x)
    hid = Flatten()(hid)

    #hid = merge([hid,out_list[-1]], mode='concat')

    main_loss = Dense(1, activation='sigmoid', name='main_output')(hid)

    model = Model(input=input_list, output=main_loss)

    model.compile(loss='binary_crossentropy', optimizer='rmsprop')
    model.fit(X_train_list, y_train, batch_size=BATCH_SIZE, nb_epoch=EPOCHS)

    return model

siamese.py 文件源码项目：luvina 作者: oarriaga 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def SiameseLSTM(max_token_length, hidden_size, embedding_size=300):
    text_input_1 = Input(shape=(max_token_length, embedding_size),
                         name='text_1')
    text_mask_1 = Masking(mask_value=0.0, name='text_mask_1')(text_input_1)
    # text_dropout_1 = Dropout(.5, name='text_dropout_1')(text_mask_1)

    text_input_2 = Input(shape=(max_token_length, embedding_size),
                         name='text_2')
    text_mask_2 = Masking(mask_value=0.0, name='text_mask_2')(text_input_2)
    # text_dropout_2 = Dropout(.5, name='text_dropout_2')(text_mask_2)

    lstm_1_a = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=True,
                                 name='RNN_1_a'))(text_mask_1)

    lstm_1_b = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_1_b'))(lstm_1_a)

    """
    lstm_1_c = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_1_c'))(lstm_1_b)
    """

    lstm_2_a = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=True,
                                 name='RNN_2_a'))(text_mask_2)

    lstm_2_b = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_2_b'))(lstm_2_a)

    """
    lstm_2_c = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_2_c'))(lstm_2_b)
    """

    cosine_similarity = Dot(axes=1, normalize=True,
                            name='cosine_similarity')([lstm_1_b, lstm_2_b])

    model = Model(inputs=[text_input_1, text_input_2],
                  outputs=cosine_similarity)

    return model

classifier.py 文件源码项目：narrative-prediction 作者: roemmele 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def create_model(self, n_timesteps=None, batch_size=1, include_pred_layer=True):

        input_layers = []

        seq_input_layer = Input(batch_shape=(batch_size, n_timesteps), name="seq_input_layer")
        input_layers.append(seq_input_layer)

        seq_embedding_layer = Embedding(input_dim=self.lexicon_size + 1, 
                                        output_dim=self.n_embedding_nodes, mask_zero=True, name='seq_embedding_layer')(seq_input_layer)

        for layer_num in range(self.n_hidden_layers):
            if layer_num == 0:
                seq_hidden_layer = GRU(output_dim=self.n_hidden_nodes, return_sequences=True, stateful=True, name='seq_hidden_layer1')(seq_embedding_layer)
            else: #add extra hidden layers
                seq_hidden_layer = GRU(output_dim=self.n_hidden_nodes, return_sequences=True, stateful=True, name='seq_hidden_layer' + str(layer_num + 1))(seq_hidden_layer)

        if self.use_pos:
            pos_input_layer = Input(batch_shape=(batch_size, n_timesteps), name="pos_input_layer")
            input_layers.append(pos_input_layer)

            pos_embedding_layer = Embedding(input_dim=self.n_pos_tags + 1,
                                            output_dim=self.n_pos_embedding_nodes, mask_zero=True, name='pos_embedding_layer')(pos_input_layer)

            pos_hidden_layer = GRU(output_dim=self.n_pos_nodes, return_sequences=True, stateful=True, name='pos_hidden_layer')(pos_embedding_layer)

            seq_hidden_layer = merge([seq_hidden_layer, pos_hidden_layer], mode='concat', concat_axis=-1, name='pos_merge_hidden_layer')

        if self.use_features:
            feature_input_layer = Input(batch_shape=(batch_size, self.lexicon_size + 1), name="feature_input_layer")
            input_layers.append(feature_input_layer)
            feature_hidden_layer = Dense(output_dim=self.n_feature_nodes, activation='sigmoid', name='feature_hidden_layer')(feature_input_layer)
            feature_hidden_layer = RepeatVector(n_timesteps)(feature_hidden_layer)

            seq_hidden_layer = merge([seq_hidden_layer, feature_hidden_layer], mode='concat', concat_axis=-1, name='feature_merge_hidden_layer')

        output_layers = []
        if include_pred_layer:
            pred_layer = TimeDistributed(Dense(self.lexicon_size + 1, activation="softmax", name='pred_layer'))(seq_hidden_layer)
            output_layers.append(pred_layer)
            if self.use_pos:
                pred_pos_layer = TimeDistributed(Dense(self.n_pos_tags + 1, activation="softmax", name='pred_pos_layer'))(seq_hidden_layer)
                output_layers.append(pred_pos_layer)

        model = Model(input=input_layers, output=output_layers)

        #select optimizer and compile
        model.compile(loss="sparse_categorical_crossentropy", 
                      optimizer=eval(self.optimizer)(clipvalue=self.clipvalue, lr=self.lr, decay=self.decay))

        return model