LSTMCNN.py 文件源码-python代码片段

def LSTMCNN(opt):
    # opt.seq_length = number of time steps (words) in each batch
    # opt.rnn_size = dimensionality of hidden layers
    # opt.num_layers = number of layers
    # opt.dropout = dropout probability
    # opt.word_vocab_size = num words in the vocab
    # opt.word_vec_size = dimensionality of word embeddings
    # opt.char_vocab_size = num chars in the character vocab
    # opt.char_vec_size = dimensionality of char embeddings
    # opt.feature_maps = table of feature map sizes for each kernel width
    # opt.kernels = table of kernel widths
    # opt.length = max length of a word
    # opt.use_words = 1 if use word embeddings, otherwise not
    # opt.use_chars = 1 if use char embeddings, otherwise not
    # opt.highway_layers = number of highway layers to use, if any
    # opt.batch_size = number of sequences in each batch

    if opt.use_words:
        word = Input(batch_shape=(opt.batch_size, opt.seq_length), dtype='int32', name='word')
        word_vecs = Embedding(opt.word_vocab_size, opt.word_vec_size, input_length=opt.seq_length)(word)

    if opt.use_chars:
        chars = Input(batch_shape=(opt.batch_size, opt.seq_length, opt.max_word_l), dtype='int32', name='chars')
        chars_embedding = TimeDistributed(Embedding(opt.char_vocab_size, opt.char_vec_size, name='chars_embedding'))(chars)
        cnn = CNN(opt.seq_length, opt.max_word_l, opt.char_vec_size, opt.feature_maps, opt.kernels, chars_embedding)
        if opt.use_words:
            x = Concatenate()([cnn, word_vecs])
            inputs = [chars, word]
        else:
            x = cnn
            inputs = chars
    else:
        x = word_vecs
        inputs = word

    if opt.batch_norm:
        x = BatchNormalization()(x)

    for l in range(opt.highway_layers):
        x = TimeDistributed(Highway(activation='relu'))(x)

    for l in range(opt.num_layers):
        x = LSTM(opt.rnn_size, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, stateful=True)(x)

        if opt.dropout > 0:
            x = Dropout(opt.dropout)(x)

    output = TimeDistributed(Dense(opt.word_vocab_size, activation='softmax'))(x)

    model = sModel(inputs=inputs, outputs=output)
    model.summary()

    optimizer = sSGD(lr=opt.learning_rate, clipnorm=opt.max_grad_norm, scale=float(opt.seq_length))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer)

    return model