def VQA_MODEL():
image_feature_size = 4096
word_feature_size = 300
number_of_LSTM = 3
number_of_hidden_units_LSTM = 512
max_length_questions = 30
number_of_dense_layers = 3
number_of_hidden_units = 1024
activation_function = 'tanh'
dropout_pct = 0.5
# Image model
model_image = Sequential()
model_image.add(Reshape((image_feature_size,), input_shape=(image_feature_size,)))
# Language Model
model_language = Sequential()
model_language.add(LSTM(number_of_hidden_units_LSTM, return_sequences=True, input_shape=(max_length_questions, word_feature_size)))
model_language.add(LSTM(number_of_hidden_units_LSTM, return_sequences=True))
model_language.add(LSTM(number_of_hidden_units_LSTM, return_sequences=False))
# combined model
model = Sequential()
model.add(Merge([model_language, model_image], mode='concat', concat_axis=1))
for _ in xrange(number_of_dense_layers):
model.add(Dense(number_of_hidden_units, init='uniform'))
model.add(Activation(activation_function))
model.add(Dropout(dropout_pct))
model.add(Dense(1000))
model.add(Activation('softmax'))
return model
评论列表
文章目录