def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
x = Input(batch_shape=(batch_size, None))
# mebedding
y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
for i in range(nb_layers-1):
y = GCNN(hidden_dim, window_size=window_size,
name='gcnn{}'.format(i + 1))(y)
y = GCNN(hidden_dim, window_size=window_size,
name='gcnn{}'.format(nb_layers))(y)
y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
model = Model(inputs=x, outputs=y)
return model
python类TimeDistributed()的实例源码
def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
x = Input(batch_shape=(batch_size, None))
# mebedding
y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
for i in range(nb_layers-1):
y = GCNN(hidden_dim, window_size=window_size,
name='gcnn{}'.format(i + 1))(y)
y = GCNN(hidden_dim, window_size=window_size,
name='gcnn{}'.format(nb_layers))(y)
y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
model = Model(inputs=x, outputs=y)
return model
def LM(batch_size, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
x = Input(batch_shape=(batch_size, None))
# mebedding
y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
for i in range(nb_layers-1):
y = LayerNormLSTM(hidden_dim, return_sequences=True, name='lnlstm{}'.format(i + 1))(y)
y = LayerNormLSTM(hidden_dim, return_sequences=True, name='lnlstm{}'.format(nb_layers))(y)
y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
model = Model(inputs=x, outputs=y)
return model
def LM(batch_size, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
x = Input(batch_shape=(batch_size, None))
# mebedding
y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
for i in range(nb_layers-1):
y = WeightNormGRU(hidden_dim, return_sequences=True, name='wngru{}'.format(i + 1))(y)
y = WeightNormGRU(hidden_dim, return_sequences=True, name='wngru{}'.format(nb_layers))(y)
y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
model = Model(input=x, output=y)
return model
def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
x = Input(batch_shape=(batch_size, None))
# mebedding
y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
for i in range(nb_layers-1):
y = TernaryRNN(hidden_dim, return_sequences=True, name='trnn{}'.format(i + 1))(y)
y = TernaryRNN(hidden_dim, return_sequences=True, name='trnn{}'.format(nb_layers))(y)
y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
model = Model(input=x, output=y)
return model
def learnable_wiq(context, question, question_mask, layer_dim):
"""Aligned question embedding. Same as in DRQA paper."""
question_enc = TimeDistributed(Dense(units=layer_dim, activation='relu'))(question)
context_enc = TimeDistributed(Dense(units=layer_dim, activation='relu'))(context)
question_enc = Lambda(lambda q: tf.transpose(q, [0, 2, 1]))(question_enc)
matrix = Lambda(lambda q: tf.matmul(q[0], q[1]))([context_enc, question_enc])
coefs = Lambda(lambda q: masked_softmax(matrix, question_mask, axis=2, expand=1))([matrix, question_mask])
aligned_question_enc = Lambda(lambda q: tf.matmul(q[0], q[1]))([coefs, question])
return(aligned_question_enc)
def projection(encoding, W, dropout_rate):
"""Projection layer. Dense layer from keras.
In FastQA is applied after the encoder, to project context and question representations
into different spaces."""
proj = TimeDistributed(
Dense(W,
trainable=True,
weights=np.concatenate((np.eye(W), np.eye(W)), axis=1)))(encoding)
proj = Dropout(rate=dropout_rate)(proj)
return proj
def question_attn_vector(question_encoding, question_mask, context_encoding, repeat=True):
"""Attention over question."""
question_attention_vector = TimeDistributed(Dense(1))(question_encoding)
# apply masking
question_attention_vector = Lambda(lambda q: masked_softmax(q[0], q[1]))([question_attention_vector, question_mask])
# apply the attention
question_attention_vector = Lambda(lambda q: q[0] * q[1])([question_encoding, question_attention_vector])
question_attention_vector = Lambda(lambda q: K.sum(q, axis=1))(question_attention_vector)
if repeat==True:
question_attention_vector = Lambda(lambda q: repeat_vector(q[0], q[1]))([question_attention_vector, context_encoding])
return question_attention_vector
def answer_end_pred(context_encoding, question_attention_vector, context_mask, answer_start_distribution, W, dropout_rate):
"""Answer end prediction layer."""
# Answer end prediction depends on the start prediction
def s_answer_feature(x):
maxind = K.argmax(
x,
axis=1,
)
return maxind
x = Lambda(lambda x: K.tf.cast(s_answer_feature(x), dtype=K.tf.int32))(answer_start_distribution)
start_feature = Lambda(lambda arg: K.tf.gather_nd(arg[0], K.tf.stack(
[tf.range(K.tf.shape(arg[1])[0]), tf.cast(arg[1], K.tf.int32)], axis=1)))([context_encoding, x])
start_feature = Lambda(lambda q: repeat_vector(q[0], q[1]))([start_feature, context_encoding])
# Answer end prediction
answer_end = Lambda(lambda arg: concatenate([
arg[0],
arg[1],
arg[2],
multiply([arg[0], arg[1]]),
multiply([arg[0], arg[2]])
]))([context_encoding, question_attention_vector, start_feature])
answer_end = TimeDistributed(Dense(W, activation='relu'))(answer_end)
answer_end = Dropout(rate=dropout_rate)(answer_end)
answer_end = TimeDistributed(Dense(1))(answer_end)
# apply masking
answer_end = Lambda(lambda q: masked_softmax(q[0], q[1]))([answer_end, context_mask])
answer_end = Lambda(lambda q: flatten(q))(answer_end)
return answer_end
def conv_unit(inp, n_gram, no_word = 200, window = 2):
out = Conv1D(no_word, window, strides=1, padding="valid", activation='relu')(inp)
out = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(out)
out = ZeroPadding1D(padding=(0, window-1))(out)
return out
def __init__(self, num_classes, token_index, max_sents, max_tokens,
embedding_type='glove.6B.100d', embedding_dims=100):
"""Creates a `SentenceModelFactory` instance for building various models that operate over
(samples, max_sentences, max_tokens) input.
Args:
num_classes: The number of output classes.
token_index: The dictionary of token and its corresponding integer index value.
max_sents: The max number of sentences in a document.
max_tokens: The max number of tokens in a sentence.
embedding_type: The embedding type to use. Set to None to use random embeddings.
(Default value: 'glove.6B.100d')
embedding_dims: The number of embedding dims to use for representing a word. This argument will be ignored
when `embedding_type` is set. (Default value: 100)
"""
self.num_classes = num_classes
self.token_index = token_index
self.max_sents = max_sents
self.max_tokens = max_tokens
# This is required to make TimeDistributed(word_encoder_model) work.
# TODO: Get rid of this restriction when https://github.com/fchollet/keras/issues/6917 resolves.
if self.max_tokens is None:
raise ValueError('`max_tokens` should be provided.')
if embedding_type is not None:
self.embeddings_index = get_embeddings_index(embedding_type)
self.embedding_dims = self.embeddings_index.values()[0].shape[-1]
else:
self.embeddings_index = None
self.embedding_dims = embedding_dims
def test_sequential_model_saving():
model = Sequential()
model.add(Dense(2, input_dim=3))
model.add(RepeatVector(3))
model.add(TimeDistributed(Dense(3)))
model.compile(loss=objectives.MSE,
optimizer=optimizers.RMSprop(lr=0.0001),
metrics=[metrics.categorical_accuracy],
sample_weight_mode='temporal')
x = np.random.random((1, 3))
y = np.random.random((1, 3, 3))
model.train_on_batch(x, y)
out = model.predict(x)
_, fname = tempfile.mkstemp('.h5')
save_model(model, fname)
new_model = load_model(fname)
os.remove(fname)
out2 = new_model.predict(x)
assert_allclose(out, out2, atol=1e-05)
# test that new updates are the same with both models
x = np.random.random((1, 3))
y = np.random.random((1, 3, 3))
model.train_on_batch(x, y)
new_model.train_on_batch(x, y)
out = model.predict(x)
out2 = new_model.predict(x)
assert_allclose(out, out2, atol=1e-05)
def build_hcnn_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False):
N = maxnum
L = maxlen
logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, filter2_len = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
opts.nbfilters, opts.filter1_len, opts.filter2_len, opts.dropout, opts.l2_value))
word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
drop_x = Dropout(opts.dropout, name='drop_x')(x)
resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)
z = TimeDistributed(Convolution1D(opts.nbfilters, opts.filter1_len, border_mode='valid'), name='z')(resh_W)
avg_z = TimeDistributed(AveragePooling1D(pool_length=L-opts.filter1_len+1), name='avg_z')(z) # shape= (N, 1, nbfilters)
resh_z = Reshape((N, opts.nbfilters), name='resh_z')(avg_z) # shape(N, nbfilters)
hz = Convolution1D(opts.nbfilters, opts.filter2_len, border_mode='valid', name='hz')(resh_z)
# avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz)
y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz)
model = Model(input=word_input, output=y)
if verbose:
model.summary()
start_time = time.time()
model.compile(loss='mse', optimizer='rmsprop')
total_time = time.time() - start_time
logger.info("Model compiled in %.4f s" % total_time)
return model
def build_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
N = maxnum
L = maxlen
logger = get_logger("Build model")
logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
opts.lstm_units, opts.dropout, opts.l2_value))
word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
drop_x = Dropout(opts.dropout, name='drop_x')(x)
resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)
z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W)
avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z)
hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(avg_z)
# TODO, random drop sentences
drop_hz = Dropout(opts.dropout, name='drop_hz')(hz)
avg_hz = GlobalAveragePooling1D(name='avg_hz')(drop_hz)
y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz)
model = Model(input=word_input, output=y)
if opts.init_bias and init_mean_value:
logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
model.layers[-1].b.set_value(bias_value)
if verbose:
model.summary()
start_time = time.time()
model.compile(loss='mse', optimizer='rmsprop')
total_time = time.time() - start_time
logger.info("Model compiled in %.4f s" % total_time)
return model
def build_bidirectional_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
N = maxnum
L = maxlen
logger = get_logger("Build bidirectional model")
logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
opts.lstm_units, opts.dropout, opts.l2_value))
word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
drop_x = Dropout(opts.dropout, name='drop_x')(x)
resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)
z_fwd = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z_fwd')(resh_W)
z_bwd = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True, go_backwards=True), name='z_bwd')(resh_W)
z_merged = merge([z_fwd, z_bwd], mode='concat', name='z_merged')
avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z_merged)
hz_fwd = LSTM(opts.lstm_units, return_sequences=True, name='hz_fwd')(avg_z)
hz_bwd = LSTM(opts.lstm_units, return_sequences=True, go_backwards=True, name='hz_bwd')(avg_z)
hz_merged = merge([hz_fwd, hz_bwd], mode='concat', name='hz_merged')
# avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz_merged)
y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz)
model = Model(input=word_input, output=y)
if opts.init_bias and init_mean_value:
logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
model.layers[-1].b.set_value(bias_value)
if verbose:
model.summary()
start_time = time.time()
model.compile(loss='mse', optimizer='rmsprop')
total_time = time.time() - start_time
logger.info("Model compiled in %.4f s" % total_time)
return model
def build_attention_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
N = maxnum
L = maxlen
logger = get_logger('Build attention pooling model')
logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
opts.lstm_units, opts.dropout, opts.l2_value))
word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
drop_x = Dropout(opts.dropout, name='drop_x')(x)
resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)
z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W)
avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z)
hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(avg_z)
# avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
# avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz)
attent_hz = Attention(name='attent_hz')(hz)
y = Dense(output_dim=1, activation='sigmoid', name='output')(attent_hz)
model = Model(input=word_input, output=y)
if opts.init_bias and init_mean_value:
logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
model.layers[-1].b.set_value(bias_value)
if verbose:
model.summary()
start_time = time.time()
model.compile(loss='mse', optimizer='rmsprop')
total_time = time.time() - start_time
logger.info("Model compiled in %.4f s" % total_time)
return model
def build_attention2_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
N = maxnum
L = maxlen
logger = get_logger('Build attention pooling model')
logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
opts.lstm_units, opts.dropout, opts.l2_value))
word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
drop_x = Dropout(opts.dropout, name='drop_x')(x)
resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)
z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W)
att_z = TimeDistributed(Attention(name='att_z'))(z)
hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(att_z)
# avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
# avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz)
attent_hz = Attention(name='attent_hz')(hz)
y = Dense(output_dim=1, activation='sigmoid', name='output')(attent_hz)
model = Model(input=word_input, output=y)
if opts.init_bias and init_mean_value:
logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
model.layers[-1].b.set_value(bias_value)
if verbose:
model.summary()
start_time = time.time()
model.compile(loss='mse', optimizer='rmsprop')
total_time = time.time() - start_time
logger.info("Model compiled in %.4f s" % total_time)
return model
def __init__(self, rnn_dim, rnn_unit='gru', input_shape=(0,),
dropout=0.0, highway=False, return_sequences=False,
dense_dim=0):
if rnn_unit == 'gru':
rnn = GRU
else:
rnn = LSTM
self.model = Sequential()
self.model.add(
Bidirectional(rnn(rnn_dim,
dropout=dropout,
recurrent_dropout=dropout,
return_sequences=return_sequences),
input_shape=input_shape))
# self.model.add(rnn(rnn_dim,
# dropout=dropout,
# recurrent_dropout=dropout,
# return_sequences=return_sequences,
# input_shape=input_shape))
if highway:
if return_sequences:
self.model.add(TimeDistributed(Highway(activation='tanh')))
else:
self.model.add(Highway(activation='tanh'))
if dense_dim > 0:
self.model.add(TimeDistributed(Dense(dense_dim,
activation='relu')))
self.model.add(TimeDistributed(Dropout(dropout)))
self.model.add(TimeDistributed(BatchNormalization()))
def __init__(self, dense_dim, sequence_length=0,
input_dim=0, dropout=0.0):
self.dense_dim = dense_dim
self.sequence_length = sequence_length
self.input_dim = input_dim
model = Sequential()
model.add(Dense(dense_dim,
activation='relu',
input_shape=(input_dim,)))
model.add(Dropout(dropout))
model.add(BatchNormalization())
self.model = TimeDistributed(model)
def __init__(self, dense_dim, sequence_length=0,
input_dim=0, dropout=0.0):
model = Sequential()
model.add(Dense(dense_dim,
activation='relu',
input_shape=(input_dim,)))
model.add(Dropout(dropout))
model.add(BatchNormalization())
self.model = TimeDistributed(model, input_shape=(sequence_length, input_dim,))