def create_joint_model(input_dim, init_w, init_b, gamma, weight_hinge, learning_rate, decay, regulariser=None):
image_input = Input(shape=(input_dim,), dtype='float32', name='image_input')
db_input = Input(shape=(input_dim,), dtype='float32', name="db_input")
shared_layer = Dense(1, input_dim=input_dim, kernel_regularizer=regulariser, kernel_initializer='uniform',
activation="linear", use_bias=True, name='shared_layer')
_ = shared_layer(image_input)
_ = shared_layer(db_input)
model = Model(inputs=[image_input, db_input], outputs=[shared_layer.get_output_at(0), shared_layer.get_output_at(1)])
adam = Adam(lr=learning_rate) # SGD should also work because convex loss function, but Adam converges faster.
model.compile(optimizer=adam, loss=['hinge', 'mse'], loss_weights=[weight_hinge, gamma],
metrics=[my_accuracy, 'mse'])
return model
python类Input()的实例源码
def build_lstm(output_dim, embeddings):
loss_function = "categorical_crossentropy"
# this is the placeholder tensor for the input sequences
sequence = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype="int32")
# this embedding layer will transform the sequences of integers
embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=MAX_SEQUENCE_LENGTH, weights=[embeddings], trainable=True)(sequence)
# 4 convolution layers (each 1000 filters)
cnn = [Convolution1D(filter_length=filters, nb_filter=1000, border_mode="same") for filters in [2, 3, 5, 7]]
# concatenate
merged_cnn = merge([cnn(embedded) for cnn in cnn], mode="concat")
# create attention vector from max-pooled convoluted
maxpool = Lambda(lambda x: keras_backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
attention_vector = maxpool(merged_cnn)
forwards = AttentionLSTM(64, attention_vector)(embedded)
backwards = AttentionLSTM(64, attention_vector, go_backwards=True)(embedded)
# concatenate the outputs of the 2 LSTM layers
bi_lstm = merge([forwards, backwards], mode="concat", concat_axis=-1)
after_dropout = Dropout(0.5)(bi_lstm)
# softmax output layer
output = Dense(output_dim=output_dim, activation="softmax")(after_dropout)
# the complete omdel
model = Model(input=sequence, output=output)
# try using different optimizers and different optimizer configs
model.compile("adagrad", loss_function, metrics=["accuracy"])
return model
def build_model(fragment_length, nb_filters, nb_output_bins, dilation_depth, nb_stacks, use_skip_connections,
learn_all_outputs, _log, desired_sample_rate, use_bias, res_l2, final_l2):
def residual_block(x):
original_x = x
# TODO: initalization, regularization?
# Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet.
tanh_out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=2 ** i, border_mode='valid', causal=True,
bias=use_bias,
name='dilated_conv_%d_tanh_s%d' % (2 ** i, s), activation='tanh',
W_regularizer=l2(res_l2))(x)
sigm_out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=2 ** i, border_mode='valid', causal=True,
bias=use_bias,
name='dilated_conv_%d_sigm_s%d' % (2 ** i, s), activation='sigmoid',
W_regularizer=l2(res_l2))(x)
x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out])
res_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias,
W_regularizer=l2(res_l2))(x)
skip_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias,
W_regularizer=l2(res_l2))(x)
res_x = layers.Merge(mode='sum')([original_x, res_x])
return res_x, skip_x
input = Input(shape=(fragment_length, nb_output_bins), name='input_part')
out = input
skip_connections = []
out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=1, border_mode='valid', causal=True,
name='initial_causal_conv')(out)
for s in range(nb_stacks):
for i in range(0, dilation_depth + 1):
out, skip_out = residual_block(out)
skip_connections.append(skip_out)
if use_skip_connections:
out = layers.Merge(mode='sum')(skip_connections)
out = layers.Activation('relu')(out)
out = layers.Convolution1D(nb_output_bins, 1, border_mode='same',
W_regularizer=l2(final_l2))(out)
out = layers.Activation('relu')(out)
out = layers.Convolution1D(nb_output_bins, 1, border_mode='same')(out)
if not learn_all_outputs:
raise DeprecationWarning('Learning on just all outputs is wasteful, now learning only inside receptive field.')
out = layers.Lambda(lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1],))(
out) # Based on gif in deepmind blog: take last output?
out = layers.Activation('softmax', name="output_softmax")(out)
model = Model(input, out)
receptive_field, receptive_field_ms = compute_receptive_field()
_log.info('Receptive Field: %d (%dms)' % (receptive_field, int(receptive_field_ms)))
return model
def get_model(
data_path, #Path to dataset
hid_dim, #Dimension of the hidden GRU layers
optimizer='rmsprop', #Optimization function to be used
loss='categorical_crossentropy' #Loss function to be used
):
metadata_dict = {}
f = open(os.path.join(data_path, 'metadata', 'metadata.txt'), 'r')
for line in f:
entry = line.split(':')
metadata_dict[entry[0]] = int(entry[1])
f.close()
story_maxlen = metadata_dict['input_length']
query_maxlen = metadata_dict['query_length']
vocab_size = metadata_dict['vocab_size']
entity_dim = metadata_dict['entity_dim']
embed_weights = np.load(os.path.join(data_path, 'metadata', 'weights.npy'))
word_dim = embed_weights.shape[1]
########## MODEL ############
story_input = Input(shape=(story_maxlen,), dtype='int32', name="StoryInput")
x = Embedding(input_dim=vocab_size+2,
output_dim=word_dim,
input_length=story_maxlen,
mask_zero=True,
weights=[embed_weights])(story_input)
query_input = Input(shape=(query_maxlen,), dtype='int32', name='QueryInput')
x_q = Embedding(input_dim=vocab_size+2,
output_dim=word_dim,
input_length=query_maxlen,
mask_zero=True,
weights=[embed_weights])(query_input)
concat_embeddings = masked_concat([x_q, x], concat_axis=1)
lstm = GRU(hid_dim, consume_less='gpu')(concat_embeddings)
reverse_lstm = GRU(hid_dim, consume_less='gpu', go_backwards=True)(concat_embeddings)
merged = merge([lstm, reverse_lstm], mode='concat')
result = Dense(entity_dim, activation='softmax')(merged)
model = Model(input=[story_input, query_input], output=result)
model.compile(optimizer=optimizer,
loss=loss,
metrics=['accuracy'])
print(model.summary())
return model
def get_model(
data_path, #Path to dataset
lstm_dim, #Dimension of the hidden LSTM layers
optimizer='rmsprop', #Optimization function to be used
loss='categorical_crossentropy', #Loss function to be used
weights_path=None #If specified initializes model with weight file given
):
metadata_dict = {}
f = open(os.path.join(data_path, 'metadata', 'metadata.txt'), 'r')
for line in f:
entry = line.split(':')
metadata_dict[entry[0]] = int(entry[1])
f.close()
story_maxlen = metadata_dict['input_length']
query_maxlen = metadata_dict['query_length']
vocab_size = metadata_dict['vocab_size']
entity_dim = metadata_dict['entity_dim']
embed_weights = np.load(os.path.join(data_path, 'metadata', 'weights.npy'))
word_dim = embed_weights.shape[1]
########## MODEL ############
story_input = Input(shape=(story_maxlen,), dtype='int32', name="StoryInput")
x = Embedding(input_dim=vocab_size+2,
output_dim=word_dim,
input_length=story_maxlen,
mask_zero=True,
weights=[embed_weights])(story_input)
query_input = Input(shape=(query_maxlen,), dtype='int32', name='QueryInput')
x_q = Embedding(input_dim=vocab_size+2,
output_dim=word_dim,
input_length=query_maxlen,
mask_zero=True,
weights=[embed_weights])(query_input)
concat_embeddings = masked_concat([x_q, x], concat_axis=1)
lstm = LSTM(lstm_dim, consume_less='gpu')(concat_embeddings)
reverse_lstm = LSTM(lstm_dim, consume_less='gpu', go_backwards=True)(concat_embeddings)
merged = merge([lstm, reverse_lstm], mode='concat')
result = Dense(entity_dim, activation='softmax')(merged)
model = Model(input=[story_input, query_input], output=result)
if weights_path:
model.load_weights(weights_path)
model.compile(optimizer=optimizer,
loss=loss,
metrics=['accuracy'])
print(model.summary())
return model
def create(cls, classes, maximum_tokens, embedding_size, lstm_units, dropout, bidirectional):
"""
Create a model that labels semantic relationships between text pairs.
The text pairs are passed in as two aligned matrices of size
(batch size, maximum embedding tokens, embedding size). They are generated by TextPairEmbeddingGenerator.
:param classes: the number of distinct classes to categorize
:type classes: int
:param maximum_tokens: maximum number of embedded tokens
:type maximum_tokens: int
:param embedding_size: size of the embedding vector
:type embedding_size: int
:param lstm_units: number of hidden units in the shared LSTM
:type lstm_units: int
:param dropout: dropout rate or None for no dropout
:type dropout: float or None
:param bidirectional: should the shared LSTM be bidirectional?
:type bidirectional: bool
:return: the created model
:rtype: TextPairClassifier
"""
# Create the model geometry.
input_shape = (maximum_tokens, embedding_size)
# Input two sets of aligned text pairs.
input_1 = Input(input_shape)
input_2 = Input(input_shape)
# Apply the same LSTM to each.
if bidirectional:
lstm = Bidirectional(LSTM(lstm_units), name="lstm")
else:
lstm = LSTM(lstm_units, name="lstm")
r1 = lstm(input_1)
r2 = lstm(input_2)
# Concatenate the embeddings with their product and squared difference.
p = multiply([r1, r2])
negative_r2 = Lambda(lambda x: -x)(r2)
d = add([r1, negative_r2])
q = multiply([d, d])
v = [r1, r2, p, q]
lstm_output = concatenate(v)
if dropout is not None:
lstm_output = Dropout(dropout, name="dropout")(lstm_output)
# A single-layer perceptron maps the concatenated vector to the labels. It has a number of hidden states equal
# to the square root of the length of the concatenated vector.
m = sum(t.shape[1].value for t in v)
perceptron = Dense(math.floor(math.sqrt(m)), activation="relu")(lstm_output)
logistic_regression = Dense(classes, activation="softmax", name="softmax")(perceptron)
model = Model([input_1, input_2], logistic_regression, "Text pair classifier")
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
return cls(model)