def create_two_level_bi_lstm(input_4d, embedding_layer,
max_sentence_length, max_n_analyses, max_word_root_length,
lstm_dim, embedding_dim):
r = Reshape((max_sentence_length * max_n_analyses * max_word_root_length,))
# input_4d = Lambda(lambda x: x, output_shape=lambda s: s)(input_4d)
rr = r(input_4d)
input_embeddings = embedding_layer(rr)
print input_embeddings
r = MaskedReshape(
(max_sentence_length * max_n_analyses, max_word_root_length, embedding_dim),
(max_sentence_length * max_n_analyses, max_word_root_length))
# input_embeddings = Lambda(lambda x: x, output_shape=lambda s: s)(input_embeddings)
rr = r(input_embeddings)
lstm_layer = Bidirectional(LSTM(lstm_dim,
input_shape=(max_word_root_length, embedding_dim)))
td_lstm_layer = TimeDistributed(lstm_layer,
input_shape=(max_word_root_length, embedding_dim))
lstm_layer_output = td_lstm_layer(rr)
lstm_layer_output_relu = Activation('relu')(lstm_layer_output)
print "lstm_layer_output_relu", lstm_layer_output_relu
r = Reshape((max_sentence_length, max_n_analyses, 2 * lstm_dim))
lstm_layer_output_relu = Lambda(lambda x: x, output_shape=lambda s: s)(lstm_layer_output_relu)
lstm_layer_output_relu_reshaped = r(lstm_layer_output_relu)
print "lstm_layer_output_relu_reshaped", lstm_layer_output_relu_reshaped
return input_embeddings, lstm_layer_output_relu_reshaped
python类Bidirectional()的实例源码
maskedreshape.py 文件源码
项目:neural-turkish-morphological-disambiguator
作者: onurgu
项目源码
文件源码
阅读 16
收藏 0
点赞 0
评论 0
def build_models(params, index_embedding):
in_layer = Input(shape=(params['max_len'],), dtype='int32')
mid_layer = Embedding(input_dim=params['num_words'],
output_dim=params['embedding_len'],
weights=[index_embedding])(in_layer)
# mid_layer = LSTM(params['lstm_output_dim'], return_sequences=True,
# dropout=0.5, recurrent_dropout=0.5)(mid_layer)
mid_layer = Bidirectional(LSTM(params['lstm_output_dim'], return_sequences=True,
dropout=0.5, recurrent_dropout=0.5)) (mid_layer)
mid_layer = Dense(params['dense_units'])(mid_layer)
mid_layer = Dropout(0.3)(mid_layer)
mid_layer = Flatten()(mid_layer)
if (params['num_class'] == 2):
loss = 'binary_crossentropy'
out_layer = Dense(units=1, activation='sigmoid')(mid_layer)
else:
loss = 'categorical_crossentropy'
out_layer = Dense(units=params['num_class'], activation='softmax')(mid_layer)
single_model = Model(inputs=in_layer, outputs=out_layer)
single_model.compile(loss=loss, optimizer='rmsprop', metrics=['accuracy'])
return single_model
def lstm_units(self):
lstm = self.model.get_layer("lstm")
if isinstance(lstm, Bidirectional):
lstm = lstm.layer
return lstm.units
def bidirectional(self):
return isinstance(self.model.get_layer("lstm"), Bidirectional)
def __init__(self, word_index, embedding_matrix):
embedding_layer_c = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_C,
trainable=False)
embedding_layer_q = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_Q,
trainable=False)
embedding_layer_a = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_A,
trainable=False)
context = Input(shape=(MAX_SEQUENCE_LENGTH_C,), dtype='int32', name='context')
question = Input(shape=(MAX_SEQUENCE_LENGTH_Q,), dtype='int32', name='question')
answer = Input(shape=(MAX_SEQUENCE_LENGTH_A,), dtype='int32', name='answer')
embedded_context = embedding_layer_c(context)
embedded_question = embedding_layer_q(question)
embedded_answer = embedding_layer_a(answer)
l_lstm_c = Bidirectional(LSTM(60))(embedded_context)
l_lstm_q = Bidirectional(LSTM(60))(embedded_question)
l_lstm_a = Bidirectional(LSTM(60))(embedded_answer)
concat_c_q = concatenate([l_lstm_q, l_lstm_c], axis=1)
relu_c_q = Dense(100, activation='relu')(concat_c_q)
relu_c_q = Dropout(0.25)(relu_c_q)
concat_c_q_a = concatenate([l_lstm_a, relu_c_q], axis = 1)
softmax_c_q_a = Dense(2, activation='softmax')(concat_c_q_a)
self.model = Model([question, answer, context], softmax_c_q_a)
opt = Nadam()
self.model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['acc'])
def __init__(self, word_index, embedding_matrix):
embedding_layer_c = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_C,
trainable=False)
embedding_layer_q = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_Q,
trainable=False)
embedding_layer_a = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_A,
trainable=False)
context = Input(shape=(MAX_SEQUENCE_LENGTH_C,), dtype='int32', name='context')
question = Input(shape=(MAX_SEQUENCE_LENGTH_Q,), dtype='int32', name='question')
answer = Input(shape=(MAX_SEQUENCE_LENGTH_A,), dtype='int32', name='answer')
embedded_context = embedding_layer_c(context)
embedded_question = embedding_layer_q(question)
embedded_answer = embedding_layer_a(answer)
l_lstm_c = Bidirectional(LSTM(60, return_sequences=True))(embedded_context)
l_lstm_c = Bidirectional(LSTM(60))(l_lstm_c)
l_lstm_q = Bidirectional(LSTM(60))(embedded_question)
l_lstm_a = Bidirectional(LSTM(60))(embedded_answer)
concat_c_q = concatenate([l_lstm_q, l_lstm_c], axis=1)
relu_c_q = Dense(100, activation='relu')(concat_c_q)
relu_c_q = Dropout(0.25)(relu_c_q)
concat_c_q_a = concatenate([l_lstm_a, relu_c_q], axis = 1)
softmax_c_q_a = Dense(2, activation='softmax')(concat_c_q_a)
self.model = Model([question, answer, context], softmax_c_q_a)
opt = Nadam()
self.model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['acc'])
def __init__(self, word_index, embedding_matrix):
embedding_layer_q = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_Q,
trainable=False)
embedding_layer_a = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_A,
trainable=False)
question = Input(shape=(MAX_SEQUENCE_LENGTH_Q,), dtype='int32', name='question')
answer = Input(shape=(MAX_SEQUENCE_LENGTH_A,), dtype='int32', name='answer')
embedded_question = embedding_layer_q(question)
embedded_answer = embedding_layer_a(answer)
l_lstm_q = Bidirectional(LSTM(60))(embedded_question)
l_lstm_a = Bidirectional(LSTM(60))(embedded_answer)
concat_c_q_a = concatenate([l_lstm_a, l_lstm_q], axis = 1)
softmax_c_q_a = Dense(2, activation='softmax')(concat_c_q_a)
self.model = Model([question, answer], softmax_c_q_a)
opt = Nadam()
self.model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['acc'])
def __init__(self, word_index, embedding_matrix):
embedding_layer_c = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_C,
trainable=False)
embedding_layer_q = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_Q,
trainable=False)
embedding_layer_a = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH_A,
trainable=False)
context = Input(shape=(MAX_SEQUENCE_LENGTH_C,), dtype='int32', name='context')
question = Input(shape=(MAX_SEQUENCE_LENGTH_Q,), dtype='int32', name='question')
answer = Input(shape=(MAX_SEQUENCE_LENGTH_A,), dtype='int32', name='answer')
embedded_context = embedding_layer_c(context)
embedded_question = embedding_layer_q(question)
embedded_answer = embedding_layer_a(answer)
l_lstm_c = Bidirectional(LSTM(60))(embedded_context)
l_lstm_q = Bidirectional(LSTM(60))(embedded_question)
l_lstm_a = Bidirectional(LSTM(60))(embedded_answer)
concat_c_q = concatenate([l_lstm_q, l_lstm_c], axis=1)
relu_c_q = Dense(100, activation='tanh')(concat_c_q)
concat_c_a = concatenate([l_lstm_a, l_lstm_c], axis=1)
relu_c_a = Dense(100, activation='tanh')(concat_c_a)
relu_c_q = Dropout(0.5)(relu_c_q)
relu_c_a = Dropout(0.5)(relu_c_a)
concat_c_q_a = merge([relu_c_a, relu_c_q], mode='cos')
softmax_c_q_a = Dense(2, activation='softmax')(concat_c_q_a)
self.model = Model([question, answer, context], softmax_c_q_a)
opt = Nadam()
self.model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['acc'])
def build_model(data, word_weights, tag_window=5, embed_dim=100):
batch_size = 32
nb_epoch = 16
nb_class = 4
hidden_dim = 128
train_x = np.array(list(data['x']))
train_y = np.array(list(data['y']))
train_y = np_utils.to_categorical(train_y, nb_class)
print(train_x.shape)
print(train_y.shape)
input_x = Input(shape=(tag_window, ), dtype='float32', name='input_x')
embed_x = Embedding(output_dim=embed_dim,
input_dim=word_weights.shape[0],
input_length=tag_window,
weights=[word_weights],
name='embed_x')(input_x)
bi_lstm = Bidirectional(LSTM(hidden_dim, return_sequences=False), merge_mode='sum')(embed_x)
x_dropout = Dropout(0.5)(bi_lstm)
x_output = Dense(nb_class,
# kernel_regularizer=regularizers.l2(0.01),
# kernel_constraint=maxnorm(3.0),
# activity_regularizer=regularizers.l2(0.01),
activation='softmax')(x_dropout)
model = Model(input=[input_x], output=[x_output])
model.compile(optimizer='adamax', loss='categorical_crossentropy',metrics=['accuracy'])
model.fit([train_x], [train_y], validation_split=0.2,
batch_size=batch_size, epochs=nb_epoch, shuffle=True)
def LSTMLayer(embed_matrix, embed_input, sequence_length, dropout_prob, hidden_dims, embedding_dim=300, lstm_dim=100):
model = Sequential()
model.add(Embedding(embed_input, embedding_dim, input_length=sequence_length, weights=[embed_matrix]))
model.add(Bidirectional(MGU(lstm_dim, return_sequences=True)))
#model.add(AttentionLayer(lstm_dim))
model.add(GlobalMaxPooling1D())
# 3. Hidden Layer
model.add(Dense(hidden_dims))
model.add(Dropout(dropout_prob[1]))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy'])
return model
def HierarchicalRNN(embed_matrix, max_words, ans_cnt, sequence_length, embedding_dim, lstm_dim=100):
''' Hierachical RNN model
Input: (batch_size, answers, answer words)
Args:
embed_matrix: word embedding
max words: word dict size of embedding layer
ans_cnt: answer count
sequence_length: answer words count
embedding_dim: embedding dimention
lstm_dim:
'''
hnn = Sequential()
x = Input(shape=(ans_cnt, sequence_length))
# 1. time distributed word embedding: (None, steps, words, embed_dim)
words_embed = TimeDistributed(Embedding(max_words, embedding_dim,input_length=sequence_length,weights=[embed_matrix]))(x)
# 2. word level lstm embedding: --> (None, steps/sentence_num, hidden/sent_words, hidden_dim)
word_lstm = TimeDistributed(Bidirectional(MGU(lstm_dim, return_sequences=True)))(words_embed)
# 3. average pooling : --> (None,steps,dim)
word_avg = TimeDistributed(GlobalMaxPooling1D())(word_lstm)
#word_avg = TimeDistributed(AttentionLayer(lstm_dim*2))(word_lstm)
# 4. sentence lstm: --> (None, hidden, hidden_dim)
sent_lstm = Bidirectional(MGU(lstm_dim, return_sequences=True))(word_avg)
# 5. pooling: --> (None, hidden_dim)
sent_avg = GlobalMaxPooling1D()(sent_lstm)
#sent_avg = AttentionLayer(lstm_dim*2)(sent_lstm)
model = Model(input=x, output=sent_avg)
hnn.add(model)
return hnn
# vim: set expandtab ts=4 sw=4 sts=4 tw=100:
def get_attention(self, inputs):
'''
Takes inputs and returns pairs of synsets and corresponding attention values.
'''
if not self.attention_model:
self.define_attention_model()
attention_outputs = self.attention_model.predict(inputs)
sent_attention_values = []
for sentence_input, sentence_attention in zip(inputs, attention_outputs):
word_attention_values = []
for word_input, word_attention in zip(sentence_input, sentence_attention):
# Size of word input is (senses, hyps+1)
# Ignoring the last hyp index because that is just the word index pt there by
# OntoAwareEmbedding for sense priors.
if word_input.sum() == 0:
# This is just padding
continue
word_input = word_input[:, :-1] # removing last hyp index.
sense_hyp_prod = self.num_senses * self.num_hyps
assert len(word_attention) == sense_hyp_prod or len(word_attention) == 2 * sense_hyp_prod
attention_per_sense = []
if len(word_attention) == 2 * sense_hyp_prod:
# The encoder is Bidirectional. We have attentions from both directions.
forward_sense_attention = word_attention[:len(word_attention) // 2]
backward_sense_attention = word_attention[len(word_attention) // 2:]
processed_attention = zip(forward_sense_attention, backward_sense_attention)
else:
# Encoder is not bidirectional
processed_attention = word_attention
hyp_ind = 0
while hyp_ind < len(processed_attention):
attention_per_sense.append(processed_attention[hyp_ind:hyp_ind+self.num_hyps])
hyp_ind += self.num_hyps
sense_attention_values = []
for sense_input, attention_per_hyp in zip(word_input, attention_per_sense):
hyp_attention_values = []
for hyp_input, hyp_attention in zip(sense_input, attention_per_hyp):
if hyp_input == 0:
continue
hyp_attention_values.append((self.data_processor.get_token_from_index(hyp_input,
onto_aware=True),
hyp_attention))
sense_attention_values.append(hyp_attention_values)
word_attention_values.append(sense_attention_values)
sent_attention_values.append(word_attention_values)
return sent_attention_values
def fhan3_avg(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER):
wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32')
wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInputs)
hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding)
#alpha_its, Si = AttentionLayer(name='att1')(hij)
wordDrop = Dropout(DROPOUTPER, name='wordDrop')(hij)
word_pool = GlobalAveragePooling1D()(wordDrop)
wordEncoder = Model(wordInputs, word_pool)
# -----------------------------------------------------------------------------------------------
docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32')
#sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs)
sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(docInputs)
hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding)
#alpha_s, Vb = AttentionLayer(name='att2')(hi)
sentDrop = Dropout(DROPOUTPER, name='sentDrop')(hi)
sent_pool = GlobalAveragePooling1D()(sentDrop)
Vb = Reshape((1, sent_pool._keras_shape[1]))(sent_pool)
#-----------------------------------------------------------------------------------------------
headlineInput = Input(shape=(MAX_WORDS,), name='headlineInput',dtype='float32')
headlineEmb = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, mask_zero=False, name='headlineEmb')(headlineInput)
#Vb = Masking(mask_value=0.0, name='Vb')(Vb)
headlineBodyEmb = concatenate([headlineEmb, Vb], axis=1, name='headlineBodyEmb')
h3 = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru3')(headlineBodyEmb)
#a3, Vn = AttentionLayer(name='att3')(h3)
headDrop = Dropout(DROPOUTPER, name='3Drop')(h3)
head_pool = GlobalAveragePooling1D()(headDrop)
v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(head_pool)
model = Model(inputs=[docInputs, headlineInput] , outputs=[v6])
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
return model, wordEncoder
def fhan3_max(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER):
wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32')
wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInputs)
hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding)
#alpha_its, Si = AttentionLayer(name='att1')(hij)
wordDrop = Dropout(DROPOUTPER, name='wordDrop')(hij)
word_max = GlobalMaxPooling1D()(wordDrop)
wordEncoder = Model(wordInputs, word_max)
# -----------------------------------------------------------------------------------------------
docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32')
#sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs)
sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(docInputs)
hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding)
#alpha_s, Vb = AttentionLayer(name='att2')(hi)
sentDrop = Dropout(DROPOUTPER, name='sentDrop')(hi)
sent_max = GlobalMaxPooling1D()(sentDrop)
Vb = Reshape((1, sent_max._keras_shape[1]))(sent_max)
#-----------------------------------------------------------------------------------------------
headlineInput = Input(shape=(MAX_WORDS,), name='headlineInput',dtype='float32')
headlineEmb = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, mask_zero=False, name='headlineEmb')(headlineInput)
#Vb = Masking(mask_value=0.0, name='Vb')(Vb)
headlineBodyEmb = concatenate([headlineEmb, Vb], axis=1, name='headlineBodyEmb')
h3 = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru3')(headlineBodyEmb)
#a3, Vn = AttentionLayer(name='att3')(h3)
headDrop = Dropout(DROPOUTPER, name='3Drop')(h3)
head_max = GlobalMaxPooling1D()(headDrop)
v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(head_max)
model = Model(inputs=[docInputs, headlineInput] , outputs=[v6])
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
return model, wordEncoder
def fhan3_pretrain(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER):
wordInputs = Input(shape=(MAX_WORDS,), name='word1', dtype='float32')
wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=True, trainable=True, name='emb1')(wordInputs) #Assuming all the sentences have same number of words. Check for input_length again.
hij = Bidirectional(GRU(WORDGRU, name='gru1', return_sequences=True))(wordEmbedding)
wordDrop = Dropout(DROPOUTPER, name='drop1')(hij)
alpha_its, Si = AttentionLayer(name='att1')(wordDrop)
wordEncoder = Model(wordInputs, Si)
wordEncoder.load_weights('han1_pretrain.h5', by_name=True)
# -----------------------------------------------------------------------------------------------
docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32')
sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs)
sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(sentenceMasking)
hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding)
sentDrop = Dropout(DROPOUTPER, name='sentDrop')(hi)
alpha_s, Vb = AttentionLayer(name='att2')(sentDrop)
Vb = Reshape((1, Vb._keras_shape[1]))(Vb)
#-----------------------------------------------------------------------------------------------
headlineInput = Input(shape=(MAX_WORDS,), name='headlineInput',dtype='float32')
headlineEmb = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, mask_zero=True, name='headlineEmb')(headlineInput)
Vb = Masking(mask_value=0.0, name='Vb')(Vb)
headlineBodyEmb = concatenate([headlineEmb, Vb], axis=1, name='headlineBodyEmb')
h3 = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru3')(headlineBodyEmb)
h3Drop = Dropout(DROPOUTPER, name='h3drop')(h3)
a3, Vn = AttentionLayer(name='att3')(h3Drop)
v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(Vn)
model = Model(inputs=[docInputs, headlineInput] , outputs=[v6])
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
return model, wordEncoder
def HAN(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER):
wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32')
wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=True, trainable=True, name='wordEmbedding')(wordInputs)
hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding)
wordDrop = Dropout(DROPOUTPER, name='wordDrop')(hij)
alpha_its, Si = AttentionLayer(name='att1')(wordDrop)
wordEncoder = Model(wordInputs, Si)
# -----------------------------------------------------------------------------------------------
docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32')
sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs)
sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(sentenceMasking)
hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding)
sentDrop = Dropout(DROPOUTPER, name='sentDrop')(hi)
alpha_s, Vb = AttentionLayer(name='att2')(sentDrop)
Vb = Reshape((1, Vb._keras_shape[1]))(Vb)
#-----------------------------------------------------------------------------------------------
headlineInput = Input(shape=(MAX_WORDS,), name='headlineInput',dtype='float32')
headlineEmb = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, mask_zero=True, name='headlineEmb')(headlineInput)
Vb = Masking(mask_value=0.0, name='Vb')(Vb)
headlineBodyEmb = concatenate([headlineEmb, Vb], axis=1, name='headlineBodyEmb')
h3 = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru3')(headlineBodyEmb)
headDrop = Dropout(DROPOUTPER, name='3Drop')(h3)
a3, Vn = AttentionLayer(name='att3')(headDrop)
v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(Vn)
model = Model(inputs=[docInputs, headlineInput] , outputs=[v6])
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
return model, wordEncoder
def biLSTM_encoder2(input, units, dropout = 0.0, recurrent_dropout = 0.0, num_layers = 3, input_dropout = 0.3, output_dropout = 0.3, concat_layers = True):
"""Question and context encoder. Just Bi-LSTM from keras.
Added optional dropout between layers.
Added optional concatenation of each layer outputs into one output representation."""
outputs = [input]
for i in range(num_layers):
rnn_input = outputs[-1]
if input_dropout > 0:
rnn_input = Dropout(rate=input_dropout)(rnn_input)
rnn_output = Bidirectional(LSTM(units=units,
activation='tanh',
recurrent_activation='hard_sigmoid',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
bias_initializer='zeros',
unit_forget_bias=True,
kernel_regularizer=None,
recurrent_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
recurrent_constraint=None,
bias_constraint=None,
return_sequences=True,
dropout=dropout,
recurrent_dropout = recurrent_dropout,
unroll=False)) (rnn_input)
outputs.append(rnn_output)
# Concat hidden layers
if concat_layers:
output = concatenate(outputs[1:])
else:
output = outputs[-1]
if output_dropout > 0:
output = Dropout(rate=input_dropout)(output)
return output
def __init__(self, lstm_num_layers, lstm_layer_size, trainable_embeddings, **kw):
"""Initializes the Keras LSTM question processing component.
Args:
lstm_num_layers: Number of stacked LSTM layers.
lstm_layer_size: Dimensionality of each LSTM unit.
Keyword Args:
max_sentence_length: Maximum number of words to consider in each
question, necessary at train time.
bidirectional: Whether to use bidirectional LSTM layers.
"""
print('Loading GloVe data... ', end='', flush=True)
self._nlp = English()
print('Done.')
#embedding_dims = 300
embeddings = get_embeddings(self._nlp.vocab)
embedding_dims = embeddings.shape[1]
# TODO(Bernhard): Investigate how the LSTM parameters influence the
# overall performance.
self._max_len = kw.get('max_sentence_length', 15)
self._bidirectional = kw.get('bidirectional', False)
self._model = Sequential()
shallow = lstm_num_layers == 1 # marks a one layer LSTM
if trainable_embeddings:
# if embeddings are trainable we have to enforce CPU usage in order to not run out of memory.
# this is device dependent.
# TODO(Bernhard): preprocess questions ans vocab and try if we can get rid of enough words to make
# this run on gpu anyway
with tf.device("/cpu:0"):
self._model.add(Embedding(embeddings.shape[0], embeddings.shape[1],
input_length=self._max_len, trainable=True, weights=[embeddings]))
else:
# a non-trainable embedding layer can run on GPU without exhausting all the memory
self._model.add(Embedding(embeddings.shape[0], embeddings.shape[1],
input_length=self._max_len, trainable=False, weights=[embeddings]))
lstm = LSTM(output_dim=lstm_layer_size,
return_sequences=not shallow,
input_shape=(self._max_len, embedding_dims))
if self._bidirectional:
lstm = Bidirectional(lstm)
self._model.add(lstm)
if not shallow:
for i in range(lstm_num_layers-2):
lstm = LSTM(output_dim=lstm_layer_size, return_sequences=True)
if self._bidirectional:
lstm = Bidirectional(lstm)
self._model.add(lstm)
lstm = LSTM(output_dim=lstm_layer_size, return_sequences=False)
if self._bidirectional:
lstm = Bidirectional(lstm)
self._model.add(lstm)
def modelConstruction(self):
"""
Construct the melody and rhythm model.
"""
# self.melody.add(GRU(128, consume_less = 'mem', return_sequences = True,
# input_shape = (self.timestep, 56)))
# # self.melody.add(Dropout(0.5))
# self.rhythm.add(GRU(128, consume_less = 'mem', return_sequences = True,
# input_shape = (self.timestep, 56)))
# self.rhythm.add(Dropout(0.5))
#
# for i in range(2):
# self.melody.add(GRU(128, return_sequences = True))
# # self.melody.add(Dropout(0.5))
# self.rhythm.add(GRU(128, return_sequences = True))
# self.rhythm.add(Dropout(0.5))
self.melody.add(Dense(128, input_shape = (self.timestep, 56)))
self.rhythm.add(Dense(128, input_shape = (self.timestep, 56)))
for i in range(4):
self.melody.add(Bidirectional(LSTM(128, return_sequences = True)))
self.melody.add(Dropout(0.5))
self.rhythm.add(GRU(128, return_sequences = True))
# self.rhythm.add(Dropout(0.5))
self.melody.add(Dense(128, input_shape = (self.timestep, 56)))
self.rhythm.add(Dense(128, input_shape = (self.timestep, 56)))
# for i in range(3):
# self.melody.add(Bidirectional(GRU(128, return_sequences = True)))
# self.melody.add(Dropout(0.5))
# self.rhythm.add(GRU(128, return_sequences = True))
self.melody.add(Dense(33, activation = 'softmax'))
self.rhythm.add(Dense(23, activation = 'softmax'))
#compile part
self.melody.compile(optimizer = 'adam',
#loss = self.my_loss_function()
loss = 'categorical_crossentropy')
self.rhythm.compile(optimizer = 'adam',
#loss = self.my_loss_function()
loss = 'categorical_crossentropy')
def sbrt2017(num_hiddens, var_dropout, dropout, weight_decay, num_features=39,
num_classes=28):
""" SBRT model
Reference:
[1] Gal, Y, "A Theoretically Grounded Application of Dropout in
Recurrent Neural Networks", 2015.
[2] Graves, Alex, Abdel-rahman Mohamed, and Geoffrey Hinton. "Speech
recognition with deep recurrent neural networks", 2013.
[6] Wu, Yonghui, et al. "Google's Neural Machine Translation System:
Bridging the Gap between Human and Machine Translation.", 2016.
"""
x = Input(name='inputs', shape=(None, num_features))
o = x
if dropout > 0.0:
o = Dropout(dropout)(o)
o = Bidirectional(LSTM(num_hiddens,
return_sequences=True,
W_regularizer=l2(weight_decay),
U_regularizer=l2(weight_decay),
dropout_W=var_dropout,
dropout_U=var_dropout,
consume_less='gpu'))(o)
if dropout > 0.0:
o = Dropout(dropout)(o)
o = TimeDistributed(Dense(num_classes,
W_regularizer=l2(weight_decay)))(o)
# Define placeholders
labels = Input(name='labels', shape=(None,), dtype='int32', sparse=True)
inputs_length = Input(name='inputs_length', shape=(None,), dtype='int32')
# Define a decoder
dec = Lambda(decode, output_shape=decode_output_shape,
arguments={'is_greedy': True}, name='decoder')
y_pred = dec([o, inputs_length])
ctc = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc")
# Define loss as a layer
loss = ctc([o, labels, inputs_length])
return Model(input=[x, labels, inputs_length], output=[loss, y_pred])