def create_model(self, ret_model = False):
image_model = Sequential()
image_model.add(Dense(EMBEDDING_DIM, input_dim = 4096, activation='relu'))
image_model.add(RepeatVector(self.max_length))
lang_model = Sequential()
lang_model.add(Embedding(self.vocab_size, 256, input_length=self.max_length))
lang_model.add(LSTM(256,return_sequences=True))
lang_model.add(TimeDistributed(Dense(EMBEDDING_DIM)))
model = Sequential()
model.add(Merge([image_model, lang_model], mode='concat'))
model.add(LSTM(1000,return_sequences=False))
model.add(Dense(self.vocab_size))
model.add(Activation('softmax'))
print ("Model created!")
if(ret_model==True):
return model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
return model
python类TimeDistributed()的实例源码
def __call__(self, inputs):
x = self._merge_inputs(inputs)
shape = getattr(x, '_keras_shape')
replicate_model = self._replicate_model(kl.Input(shape=shape[2:]))
x = kl.TimeDistributed(replicate_model)(x)
kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay)
x = kl.Bidirectional(kl.GRU(128, kernel_regularizer=kernel_regularizer,
return_sequences=True),
merge_mode='concat')(x)
kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay)
gru = kl.GRU(256, kernel_regularizer=kernel_regularizer)
x = kl.Bidirectional(gru)(x)
x = kl.Dropout(self.dropout)(x)
return self._build(inputs, x)
def simple_cnn(agent, env, dropout=0, learning_rate=1e-3, **args):
with tf.device("/cpu:0"):
state = tf.placeholder('float', [None, agent.input_dim])
S = Input(shape=[agent.input_dim])
h = Reshape( agent.input_dim_orig )(S)
h = TimeDistributed( Convolution2D(16, 8, 8, subsample=(4, 4), border_mode='same', activation='relu', dim_ordering='tf'))(h)
# h = Dropout(dropout)(h)
h = TimeDistributed( Convolution2D(32, 4, 4, subsample=(2, 2), border_mode='same', activation='relu', dim_ordering='tf'))(h)
h = Flatten()(h)
# h = Dropout(dropout)(h)
h = Dense(256, activation='relu')(h)
# h = Dropout(dropout)(h)
h = Dense(128, activation='relu')(h)
V = Dense(env.action_space.n, activation='linear',init='zero')(h)
model = Model(S, V)
model.compile(loss='mse', optimizer=RMSprop(lr=learning_rate) )
return state, model
def answer_start_pred(context_encoding, question_attention_vector, context_mask, W, dropout_rate):
"""Answer start prediction layer."""
answer_start = Lambda(lambda arg:
concatenate([arg[0], arg[1], arg[2]]))([
context_encoding,
question_attention_vector,
multiply([context_encoding, question_attention_vector])])
answer_start = TimeDistributed(Dense(W, activation='relu'))(answer_start)
answer_start = Dropout(rate=dropout_rate)(answer_start)
answer_start = TimeDistributed(Dense(1))(answer_start)
# apply masking
answer_start = Lambda(lambda q: masked_softmax(q[0], q[1]))([answer_start, context_mask])
answer_start = Lambda(lambda q: flatten(q))(answer_start)
return answer_start
def classifier(base_layers, input_rois, batch_size, nb_classes = 3, trainable=False):
# compile times tend to be very high, so we use smaller ROI pooling regions to workaround
if K.backend() == 'tensorflow':
pooling_regions = 14
input_shape = (batch_size,14,14,2048)
elif K.backend() == 'theano':
pooling_regions = 7
input_shape = (batch_size,2048,7,7)
out_roi_pool = RoiPoolingConv(pooling_regions, batch_size)([base_layers, input_rois])
out = TimeDistributed(Flatten())(out_roi_pool)
# out = TimeDistributed(Dropout(0.4))(out)
# out = TimeDistributed(Dense(2048,activation='relu'))(out)
out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)
# note: no regression target for bg class
out_regr = TimeDistributed(Dense(4 * nb_classes, activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)
return [out_class, out_regr]
def classifier(base_layers, input_rois, batch_size, nb_classes = 3, trainable=False):
# compile times tend to be very high, so we use smaller ROI pooling regions to workaround
if K.backend() == 'tensorflow':
pooling_regions = 14
input_shape = (batch_size,14,14,512)
elif K.backend() == 'theano':
pooling_regions = 7
input_shape = (batch_size,512,7,7)
out_roi_pool = RoiPoolingConv(pooling_regions, batch_size)([base_layers, input_rois])
out = TimeDistributed(Flatten())(out_roi_pool)
out = TimeDistributed(Dense(4096,activation='relu'))(out)
out = TimeDistributed(Dropout(0.5))(out)
out = TimeDistributed(Dense(4096,activation='relu'))(out)
out = TimeDistributed(Dropout(0.5))(out)
out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)
# note: no regression target for bg class
out_regr = TimeDistributed(Dense(4 * nb_classes, activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)
return [out_class, out_regr]
def classifier(base_layers, input_rois, batch_size, nb_classes = 3, trainable=False):
# compile times tend to be very high, so we use smaller ROI pooling regions to workaround
if K.backend() == 'tensorflow':
pooling_regions = 14
input_shape = (batch_size,14,14,1024)
elif K.backend() == 'theano':
pooling_regions = 7
input_shape = (batch_size,1024,7,7)
out_roi_pool = RoiPoolingConv(pooling_regions, batch_size)([base_layers, input_rois])
out = TimeDistributed(Flatten())(out_roi_pool)
out = TimeDistributed(Dense(4096,activation='relu'))(out)
out = TimeDistributed(Dropout(0.5))(out)
out = TimeDistributed(Dense(4096,activation='relu'))(out)
out = TimeDistributed(Dropout(0.5))(out)
out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)
# note: no regression target for bg class
out_regr = TimeDistributed(Dense(4 * nb_classes, activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)
return [out_class, out_regr]
def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False):
# compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround
if K.backend() == 'tensorflow':
pooling_regions = 7
input_shape = (num_rois,7,7,512)
elif K.backend() == 'theano':
pooling_regions = 7
input_shape = (num_rois,512,7,7)
out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])
out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool)
out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out)
out = TimeDistributed(Dropout(0.5))(out)
out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out)
out = TimeDistributed(Dropout(0.5))(out)
out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)
# note: no regression target for bg class
out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)
return [out_class, out_regr]
def build(self) -> None:
"""
?????????? ??????.
"""
inp = Input(shape=(None,))
emb = Embedding(len(self.grapheme_alphabet), self.emb_dimension)(inp)
encoded = Bidirectional(self.rnn(self.units1, return_sequences=True, recurrent_dropout=self.dropout))(emb)
encoded = Dropout(self.dropout)(encoded)
decoded = TimeDistributed(Dense(self.units2, activation="relu"))(encoded)
predictions = TimeDistributed(Dense(len(self.phonetic_alphabet), activation="softmax"))(decoded)
model = Model(inputs=inp, outputs=predictions)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
self.model = model
def build(self) -> None:
"""
?????????? ??????.
"""
inp = Input(shape=(None,))
emb = Embedding(len(self.grapheme_set), self.emb_dimension)(inp)
encoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(emb)
encoded = Dropout(self.dropout)(encoded)
decoded = TimeDistributed(Dense(self.units, activation="relu"))(encoded)
predictions = TimeDistributed(Dense(3, activation="softmax"))(decoded)
model = Model(inputs=inp, outputs=predictions)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
self.model = model
def build(self) -> None:
"""
?????????? ??????.
"""
inp = Input(shape=(None,))
emb = Embedding(len(self.phonetic_alphabet), self.emb_dimension)(inp)
encoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(emb)
encoded = Dropout(self.dropout)(encoded)
decoded = Bidirectional(self.rnn(self.units, return_sequences=True, recurrent_dropout=self.dropout))(encoded)
decoded = Dropout(self.dropout)(decoded)
predictions = TimeDistributed(Dense(3, activation="softmax"))(decoded)
model = Model(inputs=inp, outputs=predictions)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
self.model = model
def prep_model(inputs, N, s0pad, s1pad, c):
# Word-level projection before averaging
inputs[0] = TimeDistributed(Dense(N, activation='relu'))(inputs[0])
inputs[0] = Lambda(lambda x: K.max(x, axis=1), output_shape=(N, ))(inputs[0])
inputs[1] = TimeDistributed(Dense(N, activation='relu'))(inputs[1])
inputs[1] = Lambda(lambda x: K.max(x, axis=1), output_shape=(N, ))(inputs[1])
merged = concatenate([inputs[0], inputs[1]])
# Deep
for i in range(c['deep']):
merged = Dense(c['nndim'], activation=c['nnact'])(merged)
merged = Dropout(c['nndropout'])(merged)
merged = BatchNormalization()(merged)
is_duplicate = Dense(1, activation='sigmoid')(merged)
return [is_duplicate], N
def prep_model(inputs, N, s0pad, s1pad, c):
# Word-level projection before averaging
inputs[0] = TimeDistributed(Dense(N, activation='relu'))(inputs[0])
inputs[0] = Lambda(lambda x: K.max(x, axis=1), output_shape=(N, ))(inputs[0])
inputs[1] = TimeDistributed(Dense(N, activation='relu'))(inputs[1])
inputs[1] = Lambda(lambda x: K.max(x, axis=1), output_shape=(N, ))(inputs[1])
merged = concatenate([inputs[0], inputs[1]])
# Deep
for i in range(c['deep']):
merged = Dense(c['nndim'], activation=c['nnact'])(merged)
merged = Dropout(c['nndropout'])(merged)
merged = BatchNormalization()(merged)
is_duplicate = Dense(1, activation='sigmoid')(merged)
return [is_duplicate], N
def build(self, inputs_shape):
# Import dimensions
(max_atoms, max_degree, num_atom_features, num_bond_features,
num_samples) = mol_shapes_to_dims(mol_shapes=inputs_shape)
# Add the dense layer that contains the trainable parameters
# Initialise dense layer with specified params (kwargs) and name
inner_layer = self.create_inner_layer_fn()
inner_layer_type = inner_layer.__class__.__name__.lower()
inner_layer.name = self.name + '_inner_'+ inner_layer_type
# Initialise TimeDistributed layer wrapper in order to parallelise
# dense layer across atoms
inner_3D_layer_name = self.name + '_inner_timedistributed'
self.inner_3D_layer = layers.TimeDistributed(inner_layer, name=inner_3D_layer_name)
# Build the TimeDistributed layer (which will build the Dense layer)
self.inner_3D_layer.build((None, max_atoms, num_atom_features+num_bond_features))
# Store dense_3D_layer and it's weights
self.trainable_weights = self.inner_3D_layer.trainable_weights
def ResidualBlock1D_helper(layers, kernel_size, filters, final_stride=1):
def f(_input):
basic = _input
for ln in range(layers):
#basic = BatchNormalization()( basic ) # triggers known keras bug w/ TimeDistributed: https://github.com/fchollet/keras/issues/5221
basic = ELU()(basic)
basic = Conv1D(filters, kernel_size, kernel_initializer='he_normal',
kernel_regularizer=l2(1.e-4), padding='same')(basic)
# note that this strides without averaging
return AveragePooling1D(pool_size=1, strides=final_stride)(Add()([_input, basic]))
return f
def classifier_layers(x, input_shape, stage_num, trainable=False):
# compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround
# (hence a smaller stride in the region that follows the ROI pool)
if K.backend() == 'tensorflow':
x = conv_block_td(x, 3, [512, 512, 1024], stage=stage_num, block='a', input_shape=input_shape, strides=(1, 2), trainable=trainable)
elif K.backend() == 'theano':
x = conv_block_td(x, 3, [512, 512, 1024], stage=stage_num, block='a', input_shape=input_shape, strides=(1, 1), trainable=trainable)
print 'INFO: Classifier layers x block a: ', x
x = identity_block_td(x, 3, [512, 512, 1024], stage=stage_num, block='c', trainable=trainable)
print 'INFO: Classifier layers x block b: ', x
x = identity_block_td(x, 3, [512, 512, 1024], stage=stage_num, block='d', trainable=trainable)
print 'INFO: Classifier layers x block c: ', x
#x = TimeDistributed(AveragePooling2D((2, 1)), name='avg_pool')(x)
return x
def BidirLSTM(n_nodes, n_classes, n_feat, max_len=None,
causal=True, loss='categorical_crossentropy', optimizer="adam",
return_param_str=False):
inputs = Input(shape=(None,n_feat))
model = LSTM(n_nodes, return_sequences=True)(inputs)
# Birdirectional LSTM
if not causal:
model_backwards = LSTM(n_nodes, return_sequences=True, go_backwards=True)(inputs)
model = Merge(mode="concat")([model, model_backwards])
model = TimeDistributed(Dense(n_classes, activation="softmax"))(model)
model = Model(input=inputs, output=model)
model.compile(optimizer=optimizer, loss=loss, sample_weight_mode="temporal", metrics=['accuracy'])
if return_param_str:
param_str = "LSTM_N{}".format(n_nodes)
if causal:
param_str += "_causal"
return model, param_str
else:
return model
def global_handle(self, emb_layer, flag):
fw_lstm_out = self.forward_lstm(emb_layer)
bw_lstm_out = self.backward_lstm(emb_layer)
conv_out = self.conv_dropout(self.conv(emb_layer))
fw_lstm_out = TimeDistributed(Dense(self.params['attention_dim']), name='fw_tb_'+flag)(fw_lstm_out)
fw_lstm_att = Attention()(fw_lstm_out)
# fw_lstm_att = Reshape((self.params['lstm_output_dim'], 1))(fw_lstm_att)
conv_out = TimeDistributed(Dense(self.params['attention_dim']), name='conv_tb_'+flag)(conv_out)
conv_att = Attention()(conv_out)
# conv_att = Reshape((self.params['filters'], 1))(conv_att)
bw_lstm_out = TimeDistributed(Dense(self.params['attention_dim']), name='bw_tb_'+flag)(bw_lstm_out)
bw_lstm_att = Attention()(bw_lstm_out)
# bw_lstm_att = Reshape((self.params['lstm_output_dim'], 1))(bw_lstm_att)
return concatenate([fw_lstm_att, conv_att, bw_lstm_att], axis=2)
def SimpleRecurrentModel(params):
model = Sequential()
# Incorporating leakiness in the neurons
model.add(leak_recurrent(input_dim=2, output_dim=params['N_rec'], return_sequences=True, activation='relu',
noise=params['rec_noise'], consume_less='mem', tau=params['tau'], dale_ratio=params['dale_ratio']))
# Before going directly to the output, we apply a relu to the signal FIRST and THEN sum THOSE signals
# So this is the difference between W * [x]_+ (what we want) and [W * x]_+ (what we would have gotten)
model.add(Activation('relu'))
# Output neuron
model.add(TimeDistributed(dense_output_with_mask(output_dim=1, activation='linear', dale_ratio=params['dale_ratio'],
input_dim=params['N_rec'])))
# Using mse, like in Daniel's example. Training is slow, for some reason when using binary_crossentropy
model.compile(loss = 'mse', optimizer='Adam', sample_weight_mode="temporal")
return model
def arch_attention(embedding_layer, sequence_length, classes):
tweet_input = Input(shape=(sequence_length,), dtype='int32')
embedded_tweet = embedding_layer(tweet_input)
activations = LSTM(128, return_sequences=True, name='recurrent_layer')(embedded_tweet)
attention = TimeDistributed(Dense(1, activation='tanh'))(activations)
attention = Flatten()(attention)
attention = Activation('softmax')(attention)
attention = RepeatVector(128)(attention)
attention = Permute([2, 1], name='attention_layer')(attention)
sent_representation = merge([activations, attention], mode='mul')
sent_representation = Lambda(lambda xin: K.sum(xin, axis=1), name='merged_layer')(sent_representation)
tweet_output = Dense(classes, activation='softmax', name='predictions')(sent_representation)
tweetnet = Model(tweet_input, tweet_output)
tweetnet.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
return tweetnet
def arch_attention36(embedding_layer, sequence_length, classes):
tweet_input = Input(shape=(sequence_length,), dtype='int32')
embedded_tweet = embedding_layer(tweet_input)
activations = LSTM(36, return_sequences=True, name='recurrent_layer')(embedded_tweet)
attention = TimeDistributed(Dense(1, activation='tanh'))(activations)
attention = Flatten()(attention)
attention = Activation('softmax')(attention)
attention = RepeatVector(36)(attention)
attention = Permute([2, 1], name='attention_layer')(attention)
sent_representation = merge([activations, attention], mode='mul')
sent_representation = Lambda(lambda xin: K.sum(xin, axis=1), name='merged_layer')(sent_representation)
tweet_output = Dense(classes, activation='softmax', name='output_layer')(sent_representation)
tweetnet = Model(tweet_input, tweet_output)
tweetnet.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
return tweetnet
def BiGRU(X_train, y_train, X_test, y_test, gru_units, dense_units, input_shape, \
batch_size, epochs, drop_out, patience):
model = Sequential()
reg = L1L2(l1=0.2, l2=0.2)
model.add(Bidirectional(GRU(units = gru_units, dropout= drop_out, activation='relu', recurrent_regularizer = reg,
return_sequences = True),
input_shape = input_shape,
merge_mode="concat"))
model.add(BatchNormalization())
model.add(TimeDistributed(Dense(dense_units, activation='relu')))
model.add(BatchNormalization())
model.add(Bidirectional(GRU(units = gru_units, dropout= drop_out, activation='relu', recurrent_regularizer=reg,
return_sequences = True),
merge_mode="concat"))
model.add(BatchNormalization())
model.add(Dense(units=1))
model.add(GlobalAveragePooling1D())
print(model.summary())
early_stopping = EarlyStopping(monitor="val_loss", patience = patience)
model.compile(loss='mse', optimizer= 'adam')
history_callback = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,\
verbose=2, callbacks=[early_stopping], validation_data=[X_test, y_test], shuffle = True)
return model, history_callback
def generate_model(output_len, chars=None):
"""Generate the model"""
print('Build model...')
chars = chars or CHARS
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
# note: in a situation where your input sequences have a variable length,
# use input_shape=(None, nb_feature).
for layer_number in range(INPUT_LAYERS):
model.add(recurrent.LSTM(HIDDEN_SIZE, input_shape=(None, len(chars)), init=INITIALIZATION,
return_sequences=layer_number + 1 < INPUT_LAYERS))
model.add(Dropout(AMOUNT_OF_DROPOUT))
# For the decoder's input, we repeat the encoded input for each time step
model.add(RepeatVector(output_len))
# The decoder RNN could be multiple layers stacked or a single layer
for _ in range(OUTPUT_LAYERS):
model.add(recurrent.LSTM(HIDDEN_SIZE, return_sequences=True, init=INITIALIZATION))
model.add(Dropout(AMOUNT_OF_DROPOUT))
# For each of step of the output sequence, decide which character should be chosen
model.add(TimeDistributed(Dense(len(chars), init=INITIALIZATION)))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def generate_model(args, nb_features, input_length, nb_repeats=1):
"""
Generate the model.
"""
emb_weights = np.eye(nb_features)
model = Sequential()
model.add(Embedding(input_dim=nb_features, output_dim=nb_features, input_length=input_length,
weights=[emb_weights], trainable=False))
for layer_id in range(args.input_layers):
model.add(args.cell_type(args.hidden_layers,
return_sequences=layer_id + 1 < args.input_layers))
model.add(Dropout(args.dropout))
model.add(RepeatVector(nb_repeats))
for _ in range(args.output_layers):
model.add(args.cell_type(args.hidden_layers, return_sequences=True))
model.add(Dropout(args.dropout))
model.add(TimeDistributed(Dense(nb_features)))
model.add(Activation("softmax"))
model.compile(loss="sparse_categorical_crossentropy",
optimizer=args.optimizer,
metrics=["accuracy"])
return model
def test(path_test, input_size, hidden_size, batch_size, save_dir, model_name, maxlen):
db = read_data(path_test)
X = create_sequences(db, maxlen, maxlen)
y = create_sequences(db, maxlen, maxlen)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))
y = np.reshape(y, (y.shape[0], y.shape[1], 1))
# build the model: 1 layer LSTM
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
# note: in a situation where your input sequences have a variable length,
# use input_shape=(None, nb_feature).
model.add(LSTM(hidden_size, input_shape=(maxlen, input_size)))
# For the decoder's input, we repeat the encoded input for each time step
model.add(RepeatVector(maxlen))
# The decoder RNN could be multiple layers stacked or a single layer
model.add(LSTM(hidden_size, return_sequences=True))
# For each of step of the output sequence, decide which character should be chosen
model.add(TimeDistributed(Dense(1)))
model.load_weights(save_dir + model_name)
model.compile(loss='mae', optimizer='adam')
model.summary()
prediction = model.predict(X, batch_size, verbose=1, )
prediction = prediction.flatten()
# prediction_container = np.array(prediction).flatten()
plt.plot(prediction.flatten()[:4000], label='prediction')
plt.plot(y.flatten()[maxlen:4000 + maxlen], label='true')
plt.legend()
plt.show()
store_prediction_and_ground_truth(model)
def train_normal_model(path_train, input_size, hidden_size, batch_size, early_stopping_patience, val_percentage,
save_dir, model_name, maxlen):
if not os.path.exists(save_dir):
os.mkdir(save_dir)
db = read_data(path_train)
train_x = db[:-maxlen]
train_y = db[maxlen:]
X = create_sequences(train_x, maxlen, maxlen)
y = create_sequences(train_y, maxlen, maxlen)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))
y = np.reshape(y, (y.shape[0], y.shape[1], 1))
#
# preparing the callbacks
check_pointer = callbacks.ModelCheckpoint(filepath=save_dir + model_name, verbose=1, save_best_only=True)
early_stop = callbacks.EarlyStopping(patience=early_stopping_patience, verbose=1)
# build the model: 1 layer LSTM
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
# note: in a situation where your input sequences have a variable length,
# use input_shape=(None, nb_feature).
model.add(LSTM(hidden_size, input_shape=(maxlen, input_size)))
# For the decoder's input, we repeat the encoded input for each time step
model.add(RepeatVector(maxlen))
# The decoder RNN could be multiple layers stacked or a single layer
model.add(LSTM(hidden_size, return_sequences=True))
# For each of step of the output sequence, decide which character should be chosen
model.add(TimeDistributed(Dense(1)))
model.compile(loss='mae', optimizer='adam')
model.summary()
model.fit(X, y, batch_size=batch_size, nb_epoch=50, validation_split=val_percentage,
callbacks=[check_pointer, early_stop])
return model
def ann_rnn(input_shape, n_classes):
"""
for working with extracted features
"""
model = Sequential(name='ann_rnn')
model.add(TimeDistributed(Dense (80, activation='elu', kernel_initializer='he_normal'), input_shape=input_shape))
model.add(BatchNormalization())
model.add(Dropout(0.35))
model.add(TimeDistributed(Dense (80, activation='elu', kernel_initializer='he_normal')))
model.add(BatchNormalization())
model.add(Dropout(0.35))
model.add(LSTM(50))
model.add(Dense(n_classes, activation = 'softmax'))
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=[keras.metrics.categorical_accuracy])
return model
def __init__(self, rnn_dim, rnn_unit='gru', input_shape=(0,),
dropout=0.0, highway=False, return_sequences=False,
dense_dim=0):
if rnn_unit == 'gru':
rnn = GRU
else:
rnn = LSTM
self.model = Sequential()
self.model.add(
Bidirectional(rnn(rnn_dim,
dropout=dropout,
recurrent_dropout=dropout,
return_sequences=return_sequences),
input_shape=input_shape))
# self.model.add(rnn(rnn_dim,
# dropout=dropout,
# recurrent_dropout=dropout,
# return_sequences=return_sequences,
# input_shape=input_shape))
if highway:
if return_sequences:
self.model.add(TimeDistributed(Highway(activation='tanh')))
else:
self.model.add(Highway(activation='tanh'))
if dense_dim > 0:
self.model.add(TimeDistributed(Dense(dense_dim,
activation='relu')))
self.model.add(TimeDistributed(Dropout(dropout)))
self.model.add(TimeDistributed(BatchNormalization()))
def fhan2_max(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER):
wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32')
wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, trainable=True, name='wordEmbedding')(wordInputs)
hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding)
Si = GlobalMaxPooling1D()(hij)
wordEncoder = Model(wordInputs, Si)
# -----------------------------------------------------------------------------------------------
docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32')
#sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs)
sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(docInputs)
hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding)
Vb = GlobalMaxPooling1D()(hi)
v6 = Dense(1, activation="sigmoid", kernel_initializer = 'glorot_uniform', name="dense")(Vb)
model = Model(inputs=[docInputs] , outputs=[v6])
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
return model, wordEncoder
def han2(MAX_NB_WORDS, MAX_WORDS, MAX_SENTS, EMBEDDING_DIM, WORDGRU, embedding_matrix, DROPOUTPER):
wordInputs = Input(shape=(MAX_WORDS,), name="wordInputs", dtype='float32')
#print 'in han2 max-nb-words'
#print MAX_NB_WORDS
wordEmbedding = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=True, trainable=True, name='wordEmbedding')(wordInputs)
hij = Bidirectional(GRU(WORDGRU, return_sequences=True), name='gru1')(wordEmbedding)
alpha_its, Si = AttentionLayer(name='att1')(hij)
#wordDrop = Dropout(DROPOUTPER, name='wordDrop')(Si)
wordEncoder = Model(wordInputs, Si)
# -----------------------------------------------------------------------------------------------
docInputs = Input(shape=(None, MAX_WORDS), name='docInputs' ,dtype='float32')
sentenceMasking = Masking(mask_value=0.0, name='sentenceMasking')(docInputs)
sentEncoding = TimeDistributed(wordEncoder, name='sentEncoding')(sentenceMasking)
hi = Bidirectional(GRU(WORDGRU, return_sequences=True), merge_mode='concat', name='gru2')(sentEncoding)
alpha_s, Vb = AttentionLayer(name='att2')(hi)
#sentDrop = Dropout(DROPOUTPER, name='sentDrop')(Vb)
v6 = Dense(1, activation="sigmoid", kernel_initializer = 'he_normal', name="dense")(Vb)
model = Model(inputs=[docInputs] , outputs=[v6])
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
return model, wordEncoder