def basic_attention(nb_words=10000, EMBEDDING_DIM=300, \
MAX_SEQUENCE_LENGTH=40, \
num_rnn=300, num_dense=300, rate_drop_rnn=0.25, \
rate_drop_dense=0.25, act='relu'):
embedding_layer = Embedding(nb_words, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH)
rnn_layer = Bidirectional(GRU(num_rnn, dropout=rate_drop_rnn, recurrent_dropout=rate_drop_rnn, return_sequences=True))
attention_W = TimeDistributed(Dense(350, activation='tanh'))
attention_w = TimeDistributed(Dense(1))
attention_softmax = Activation('softmax')
attention_sum = Lambda(lambda x: K.sum(x, axis=1))
sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences_1 = embedding_layer(sequence_1_input)
x1 = rnn_layer(embedded_sequences_1)
attention1 = attention_W(x1)
attention1 = attention_w(attention1)
attention1 = attention_softmax(attention1)
attention1 = Permute([2, 1])(attention1)
x1 = Permute([2, 1])(x1)
x1 = multiply([attention1, x1])
x1 = Permute([2, 1])(x1)
x1 = attention_sum(x1)
sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences_2 = embedding_layer(sequence_2_input)
x2 = rnn_layer(embedded_sequences_2)
attention2 = attention_W(x2)
attention2 = attention_w(attention2)
attention2 = attention_softmax(attention2)
attention2 = Permute([2, 1])(attention2)
x2 = Permute([2, 1])(x2)
x2 = multiply([attention2, x2])
x2 = Permute([2, 1])(x2)
x2 = attention_sum(x2)
merged = multiply([x1, x2])
merged = Dropout(rate_drop_dense)(merged)
merged = BatchNormalization()(merged)
merged = Dense(num_dense, activation=act)(merged)
merged = Dropout(rate_drop_dense)(merged)
merged = BatchNormalization()(merged)
preds = Dense(1, activation='sigmoid')(merged)
########################################
## train the model
########################################
model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds)
model.compile(loss='binary_crossentropy',
optimizer='nadam',
metrics=['acc'])
model.summary()
# print(STAMP)
return model
评论列表
文章目录