def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True):
input = Input(shape=(lembedding.size,), dtype='int32')
if lembedding.vector_box.W is None:
emb = Embedding(lembedding.vector_box.size,
lembedding.vector_box.vector_dim,
W_constraint=None)(input)
else:
emb = Embedding(lembedding.vector_box.size,
lembedding.vector_box.vector_dim,
weights=[lembedding.vector_box.W], W_constraint=None, )(input)
emb.trainable = train_vectors
if unit == 'gru':
forward = GRU(rnn_size)(emb)
backward = GRU(rnn_size, go_backwards=True)(emb)
else:
forward = LSTM(rnn_size)(emb)
backward = LSTM(rnn_size, go_backwards=True)(emb)
merged_rnn = merge([forward, backward], mode='concat')
dropped = Dropout(0.5)(merged_rnn)
if num_classes == 2:
out = Dense(1, activation='sigmoid')(dropped)
model = Model(input=input, output=out)
if self.optimizer is None:
self.optimizer = 'rmsprop'
model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
else:
out = Dense(num_classes, activation='softmax')(dropped)
model = Model(input=input, output=out)
if self.optimizer is None:
self.optimizer = 'adam'
model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
return model
python类GRU的实例源码
def __init__(self, lembedding, num_classes=2, ngrams=[1, 2, 3, 4, 5],
nfilters=64, rnn_type=GRU, rnn_dim=80, train_vectors=True,
optimizer=None):
if not isinstance(lembedding, TwoLevelsEmbedding):
raise LanguageClassifierException(
"The model only accepts two-level language embeddings")
if num_classes < 2:
raise LanguageClassifierException("Classes must be 2 or more")
self.optimizer = optimizer
model = self._generate_model(lembedding, num_classes, ngrams,
nfilters, rnn_type, rnn_dim, train_vectors)
super(RCNNClassifier, self).__init__(model, self.optimizer)
def rnn_test(f):
"""
All the recurrent layers share the same interface,
so we can run through them with a single function.
"""
f = keras_test(f)
return pytest.mark.parametrize("layer_class", [
recurrent.SimpleRNN,
recurrent.GRU,
recurrent.LSTM
])(f)
def build_lstm(input_shape):
model = Sequential()
model.add(Masking(input_shape=input_shape, mask_value=-1.))
# model.add(GRU(128, return_sequences=True))
model.add(GRU(128, return_sequences=False))
# Add dropout if overfitting
# model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
return model
def test_gru(self):
_runner(recurrent.GRU)
def test_temporal_reg(self):
print('temporal regression data:')
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5), output_shape=(2,),
classification=False)
print('X_train:', X_train.shape)
print('X_test:', X_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)
model = Sequential()
model.add(GRU(y_train.shape[-1], input_shape=(None, X_train.shape[-1])))
model.compile(loss='hinge', optimizer='adam')
history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
self.assertTrue(history.history['val_loss'][-1] < 0.8)
def test_temporal_reg(self):
print('temporal regression data:')
(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,),
classification=False)
print('X_train:', X_train.shape)
print('X_test:', X_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)
model = Sequential()
model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
model.compile(loss='hinge', optimizer='rmsprop')
history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
self.assertTrue(history.validation_loss[-1] < 0.75)
def LSTMModel(self, nHidden=150, lr = 0.01):
# print('nHidden: %i\tlr: %.3f' % ( nHidden, lr) )
self.rnnModel.add(GRU( nHidden, activation='sigmoid', input_shape =( None, self.maxFeatures), return_sequences=True))
# self.rnnModel.add(LSTM( nHidden, activation='sigmoid', input_shape =( None, nHidden), return_sequences=True))
self.rnnModel.add(TimeDistributedDense(nHidden))
self.rnnModel.add(Activation('relu'))
self.rnnModel.add(TimeDistributedDense(self.maxFeatures))
self.rnnModel.add(Activation('softmax'))
rmsprop = RMSprop(lr=lr, rho=0.9, epsilon=1e-06)
self.rnnModel.compile(loss='categorical_crossentropy', optimizer=rmsprop)
def BuildModel():
#global dataset_storage,model_storage,history_storage
dataset = makeData (Variables = o.Variables)
#dataset_storage = dataset
model = None
history = None
modelname = ""
print o.Model
if "LSTM" in o.Model or "GRU" in o.Model:
model, history = buildModel_1hidden(dataset,True)
if o.Model == "RNNSV1":
model, history = buildModel_RNNSV1(dataset, True)
if o.Model == "DenseIP3D":
model, history = buildModel_SimpleDense(dataset, False)
print ' ------------------------------------------'
print o.Model
if o.Model == "RNNPlusMV2" or o.Model == "RNNPlusSV1":
model, history = buildModel_RNNPlus(dataset, useAdam=True)
modelname = o.Version +"_" + o.Model + "_"+ o.Variables + "_" + o.nEpoch + "epoch_" + str( n_events/1000) + 'kEvts_' + str( o.nTrackCut) + 'nTrackCut_' + o.nMaxTrack + "nMaxTrack_" + o.nLSTMClass +"nLSTMClass_" + o.nLSTMNodes +"nLSTMNodes_"+ o.nLayers + "nLayers"
model = evalModel(dataset, model, o.Model)
if o.TrackOrder == 'pT':
modelname += "_SortpT"
if o.TrackOrder == 'Reverse':
modelname += "_ReverseOrder"
if o.TrackOrder == 'SL0':
modelname += "_SL0"
if o.doTrainC == 'y':
modelname += "_CMix"
if o.AddJetpT == 'y':
modelname += '_AddJetpT'
if int(o.EmbedSize) != 2:
modelname += "_" + o.EmbedSize+"EmbedSize"
if o.Mode == "R":
modelname = o.filebase+"_Retrain_"+o.nEpoch
if o.doLessC == "y":
modelname += "_LessC"
if o.doJetpTReweight == "y":
modelname += "_JetpTReweight"
#modelname = "test"
saveModel(modelname, model, history)
def train_breaker(datafilename, sentence_num=1000, puncs=u',?.?!???', \
RNN=recurrent.GRU, HIDDEN_SIZE=128, EPOCH_SIZE=10, validate=True):
wordtable = WordTable()
wordtable.parse(datafilename, sentence_num)
X, Y = [], []
for line in open(datafilename).readlines()[:sentence_num]:
line = line.strip().decode('utf-8')
line = re.sub(ur'(^[{0}]+)|([{0}]+$)'.format(puncs),'',line)
words = wordtable.encode(re.sub(ur'[{0}]'.format(puncs),'',line))
breaks = re.sub(ur'0[{0}]+'.format(puncs),'1',re.sub(ur'[^{0}]'.format(puncs),'0',line))
if len(words) >= 30 and len(words) <= 50 and breaks.count('1') >= 4:
x = np.zeros((len(words), wordtable.capacity), dtype=np.bool)
y = np.zeros((len(breaks), 2), dtype=np.bool)
for idx in xrange(len(words)):
x[idx][words[idx]] = True
y[idx][int(breaks[idx])] = True
X.append(x)
Y.append(y)
print 'total sentence: ', len(X)
if validate:
# Set apart 10% for validation
split_at = len(X) - len(X)/10
X_train, X_val = X[:split_at], X[split_at:]
y_train, y_val = Y[:split_at], Y[split_at:]
else:
X_train, y_train = X, Y
model = Graph()
model.add_input(name='input', input_shape=(None, wordtable.capacity))
model.add_node(RNN(HIDDEN_SIZE, return_sequences=True), name='forward', input='input')
model.add_node(TimeDistributedDense(2, activation='softmax'), name='softmax', input='forward')
model.add_output(name='output', input='softmax')
model.compile('adam', {'output': 'categorical_crossentropy'})
for epoch in xrange(EPOCH_SIZE):
print "epoch: ", epoch
for idx, (seq, label) in enumerate(zip(X_train, y_train)):
loss, accuracy = model.train_on_batch({'input':np.array([seq]), 'output':np.array([label])}, accuracy=True)
if idx % 20 == 0:
print "\tidx={0}, loss={1}, accuracy={2}".format(idx, loss, accuracy)
if validate:
_Y, _P = [], []
for (seq, label) in zip(X_val, y_val):
y = label.argmax(axis=-1)
p = model.predict({'input':np.array([seq])})['output'][0].argmax(axis=-1)
_Y.extend(list(y))
_P.extend(list(p))
_Y, _P = np.array(_Y), np.array(_P)
print "should break right: ", ((_P == 1)*(_Y == 1)).sum()
print "should break wrong: ", ((_P == 0)*(_Y == 1)).sum()
print "should not break right: ", ((_P == 0)*(_Y == 0)).sum()
print "should not break wrong: ", ((_P == 1)*(_Y == 0)).sum()
with open('wordtable_json.txt','w') as wordtable_file:
wordtable_file.write(wordtable.to_json())
with open('model_json.txt','w') as model_file:
model_file.write(model.to_json())
model.save_weights('model_weights.h5', overwrite=True)
def get_state_transfer_rnn(RNN):
'''Converts a given Recurrent sub class (e.g, LSTM, GRU) to its state transferable version.
A state transfer RNN can transfer its hidden state to another one of the same type and compatible dimensions.
'''
class StateTransferRNN(RNN):
def __init__(self, state_input=True, **kwargs):
self.state_outputs = []
self.state_input = state_input
super(StateTransferRNN, self).__init__(**kwargs)
def reset_states(self):
stateful = self.stateful
self.stateful = stateful or self.state_input or len(self.state_outputs) > 0
if self.stateful:
super(StateTransferRNN, self).reset_states()
self.stateful = stateful
def build(self,input_shape):
stateful = self.stateful
self.stateful = stateful or self.state_input or len(self.state_outputs) > 0
super(StateTransferRNN, self).build(input_shape)
self.stateful = stateful
def broadcast_state(self, rnns):
rnns = (set if type(rnns) in [list, tuple] else lambda a: {a})(rnns)
rnns -= set(self.state_outputs)
self.state_outputs.extend(rnns)
for rnn in rnns:
rnn.state_input = self
rnn.updates = getattr(rnn, 'updates', [])
rnn.updates.extend(zip(rnn.states, self.states_to_transfer))
def call(self, x, mask=None):
last_output, outputs, states = K.rnn(
self.step,
self.preprocess_input(x),
self.states or self.get_initial_states(x),
go_backwards=self.go_backwards,
mask=mask,
constants=self.get_constants(x),
unroll=self.unroll,
input_length=self.input_spec[0].shape[1])
self.updates = zip(self.states, states)
self.states_to_transfer = states
return outputs if self.return_sequences else last_output
return StateTransferRNN
def __init__(self, embedding_mat=None, maxlen_doc=7, maxlen_sent=50, filter_length=[3, 4, 5, 6],
nb_filters=200, n_vocab=10000, embedding_dims=300, hidden_gru=64, n_classes=5):
if embedding_mat is not None:
self.n_vocab, self.embedding_dims = embedding_mat.shape
else:
self.n_vocab = n_vocab
self.embedding_dims = embedding_dims
self.maxlen_doc = maxlen_doc
self.maxlen_sent = maxlen_sent
self.filter_length = filter_length
self.nb_filters = nb_filters
self.hidden_gru = hidden_gru
print "Building the model"
#graph model
model=Graph()
model.add_input(name='input', input_shape=(self.maxlen_doc*self.maxlen_sent,), dtype='int')
#Model embedding layer, for word index-> word embedding transformation
model.add_node(Embedding(self.n_vocab, self.embedding_dims, weights=[self.embedding_mat],
input_length=self.maxlen_sent*self.maxlen_doc),
name='embedding', input='input')
model.add_node(Reshape((self.maxlen_doc, 1, self.maxlen_sent, self.embedding_dims)),
name='reshape_5d', input='embedding')
#define the different filters
conv_layer = []
for each_length in filter_length:
model.add_node(TimeDistributedConvolution2D(self.nb_filters/len(filter_length),
each_length, self.embedding_dims, border_mode='valid',
input_shape=(self.maxlen_doc,1,self.maxlen_sent, self.embedding_dims)),
name='conv_{}'.format(each_length), input='reshape_5d')
model.add_node(Activation('relu'),
name='relu_conv_{}'.format(each_length), input='conv_{}'.format(each_length))
model.add_node(TimeDistributedMaxPooling2D(pool_size=(int(self.maxlen_sent - each_length+1), 1),
border_mode='valid'),
name='pool_conv_{}'.format(each_length), input='relu_conv_{}'.format(each_length))
model.add_node(TimeDistributedFlatten(),
name='flatten_conv_{}'.format(each_length), input='pool_conv_{}'.format(each_length))
conv_layer.append('flatten_conv_{}'.format(each_length))
# model.add_node(Activation('relu'), name='relu', inputs=conv_layer)
print conv_layer
model.add_node(GRU(self.hidden_gru), name='gru_forward', inputs=conv_layer)
model.add_node(GRU(self.hidden_gru, go_backwards=True), name='gru_backward', inputs=conv_layer)
model.add_node(Dropout(0.5), name='gru_outputs', inputs=['gru_forward', 'gru_backward'])
model.add_node(Dense(n_classes), name='full_con', input='gru_outputs')
model.add_node(Activation('softmax'), name='prob', input='full_con')
model.add_output(name='pred', input='prob')
model.compile('rmsprop', loss = {'pred': 'categorical_crossentropy'})
def _generate_model(self, lembedding, num_classes=2, rnn_dim=32):
WORD_PER_SENTENCES = lembedding.size_level1
SENTENCES_PER_DOCUMENT = lembedding.size_level2
EMBEDDING_DIM = lembedding.vector_box.vector_dim
INPUT_SHAPE = (WORD_PER_SENTENCES * SENTENCES_PER_DOCUMENT, )
EMBEDDING_SHAPE = (SENTENCES_PER_DOCUMENT, WORD_PER_SENTENCES, EMBEDDING_DIM)
doc = Input(shape=(INPUT_SHAPE[0], ), dtype='int32')
embedded = Sequential([
Embedding(
input_dim=lembedding.vector_box.size,
output_dim=EMBEDDING_DIM,
input_length=INPUT_SHAPE[0]
),
Reshape(EMBEDDING_SHAPE)
])(doc)
out = TimeDistributed(GRU(rnn_dim))(embedded)
next = Dropout(0.5)(out)
out = GRU(rnn_dim)(next)
out = Dropout(0.5)(out)
mapping = [
Dense(64, activation='relu'), # Maybe add more layers
]
for f in mapping:
out = f(out)
if num_classes == 2:
out = Dense(1, activation='sigmoid')(out)
model = Model(input=doc, output=out)
if self.optimizer is None:
self.optimizer = 'rmsprop'
model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
else:
out = Dense(num_classes, activation='softmax')(out)
model = Model(input=doc, output=out)
if self.optimizer is None:
self.optimizer = 'adam'
model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
return model
def lstm_memory_train(X_train_list,y_train,vocab_size):
N=len(X_train_list)
X_train_list = [sequence.pad_sequences(x_train, maxlen=MAX_LEN) for x_train in X_train_list]
input_list=[]
out_list=[]
for i in range(N):
input,out=get_embedding_input_output('f%d' %i,vocab_size)
input_list.append(input)
out_list.append(out)
x = merge(out_list,mode='concat')
lstm_out = LSTM(HIDDEN_SIZE, return_sequences=True)(x)
lstm_share=GRU(HIDDEN_SIZE, return_sequences=True)
x = lstm_out
for i in range(2):
att = TimeDistributed(Dense(1))(x)
att = Flatten()(att)
att = Activation(activation="softmax")(att)
att = RepeatVector(HIDDEN_SIZE)(att)
att = Permute((2,1))(att)
mer = merge([att, lstm_out], "mul")
mer = merge([mer, out_list[-1]], 'mul')
z = merge([lstm_out,mer],'sum')
z = lstm_share(z)
x = z
hid = AveragePooling1D(pool_length=2)(x)
hid = Flatten()(hid)
#hid = merge([hid,out_list[-1]], mode='concat')
main_loss = Dense(1, activation='sigmoid', name='main_output')(hid)
model = Model(input=input_list, output=main_loss)
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(X_train_list, y_train, batch_size=BATCH_SIZE, nb_epoch=EPOCHS)
return model
def SiameseLSTM(max_token_length, hidden_size, embedding_size=300):
text_input_1 = Input(shape=(max_token_length, embedding_size),
name='text_1')
text_mask_1 = Masking(mask_value=0.0, name='text_mask_1')(text_input_1)
# text_dropout_1 = Dropout(.5, name='text_dropout_1')(text_mask_1)
text_input_2 = Input(shape=(max_token_length, embedding_size),
name='text_2')
text_mask_2 = Masking(mask_value=0.0, name='text_mask_2')(text_input_2)
# text_dropout_2 = Dropout(.5, name='text_dropout_2')(text_mask_2)
lstm_1_a = Bidirectional(GRU(units=hidden_size,
return_sequences=True,
name='RNN_1_a'))(text_mask_1)
lstm_1_b = Bidirectional(GRU(units=hidden_size,
return_sequences=False,
name='RNN_1_b'))(lstm_1_a)
"""
lstm_1_c = Bidirectional(GRU(units=hidden_size,
return_sequences=False,
name='RNN_1_c'))(lstm_1_b)
"""
lstm_2_a = Bidirectional(GRU(units=hidden_size,
return_sequences=True,
name='RNN_2_a'))(text_mask_2)
lstm_2_b = Bidirectional(GRU(units=hidden_size,
return_sequences=False,
name='RNN_2_b'))(lstm_2_a)
"""
lstm_2_c = Bidirectional(GRU(units=hidden_size,
return_sequences=False,
name='RNN_2_c'))(lstm_2_b)
"""
cosine_similarity = Dot(axes=1, normalize=True,
name='cosine_similarity')([lstm_1_b, lstm_2_b])
model = Model(inputs=[text_input_1, text_input_2],
outputs=cosine_similarity)
return model
def create_model(self, n_timesteps=None, batch_size=1, include_pred_layer=True):
input_layers = []
seq_input_layer = Input(batch_shape=(batch_size, n_timesteps), name="seq_input_layer")
input_layers.append(seq_input_layer)
seq_embedding_layer = Embedding(input_dim=self.lexicon_size + 1,
output_dim=self.n_embedding_nodes, mask_zero=True, name='seq_embedding_layer')(seq_input_layer)
for layer_num in range(self.n_hidden_layers):
if layer_num == 0:
seq_hidden_layer = GRU(output_dim=self.n_hidden_nodes, return_sequences=True, stateful=True, name='seq_hidden_layer1')(seq_embedding_layer)
else: #add extra hidden layers
seq_hidden_layer = GRU(output_dim=self.n_hidden_nodes, return_sequences=True, stateful=True, name='seq_hidden_layer' + str(layer_num + 1))(seq_hidden_layer)
if self.use_pos:
pos_input_layer = Input(batch_shape=(batch_size, n_timesteps), name="pos_input_layer")
input_layers.append(pos_input_layer)
pos_embedding_layer = Embedding(input_dim=self.n_pos_tags + 1,
output_dim=self.n_pos_embedding_nodes, mask_zero=True, name='pos_embedding_layer')(pos_input_layer)
pos_hidden_layer = GRU(output_dim=self.n_pos_nodes, return_sequences=True, stateful=True, name='pos_hidden_layer')(pos_embedding_layer)
seq_hidden_layer = merge([seq_hidden_layer, pos_hidden_layer], mode='concat', concat_axis=-1, name='pos_merge_hidden_layer')
if self.use_features:
feature_input_layer = Input(batch_shape=(batch_size, self.lexicon_size + 1), name="feature_input_layer")
input_layers.append(feature_input_layer)
feature_hidden_layer = Dense(output_dim=self.n_feature_nodes, activation='sigmoid', name='feature_hidden_layer')(feature_input_layer)
feature_hidden_layer = RepeatVector(n_timesteps)(feature_hidden_layer)
seq_hidden_layer = merge([seq_hidden_layer, feature_hidden_layer], mode='concat', concat_axis=-1, name='feature_merge_hidden_layer')
output_layers = []
if include_pred_layer:
pred_layer = TimeDistributed(Dense(self.lexicon_size + 1, activation="softmax", name='pred_layer'))(seq_hidden_layer)
output_layers.append(pred_layer)
if self.use_pos:
pred_pos_layer = TimeDistributed(Dense(self.n_pos_tags + 1, activation="softmax", name='pred_pos_layer'))(seq_hidden_layer)
output_layers.append(pred_pos_layer)
model = Model(input=input_layers, output=output_layers)
#select optimizer and compile
model.compile(loss="sparse_categorical_crossentropy",
optimizer=eval(self.optimizer)(clipvalue=self.clipvalue, lr=self.lr, decay=self.decay))
return model