def load_model(embed_map=None):
"""
Load all model components + apply vocab expansion
"""
# Load the worddict
print 'Loading dictionary...'
with open(path_to_dictionary, 'rb') as f:
worddict = pkl.load(f)
# Create inverted dictionary
print 'Creating inverted dictionary...'
word_idict = dict()
for kk, vv in worddict.iteritems():
word_idict[vv] = kk
word_idict[0] = '<eos>'
word_idict[1] = 'UNK'
# Load model options
print 'Loading model options...'
with open('%s.pkl'%path_to_model, 'rb') as f:
options = pkl.load(f)
# Load parameters
print 'Loading model parameters...'
params = init_params(options)
params = load_params(path_to_model, params)
tparams = init_tparams(params)
# Extractor functions
print 'Compiling encoder...'
trng = RandomStreams(1234)
trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
f_enc = theano.function([x, x_mask], ctx, name='f_enc')
f_emb = theano.function([x], emb, name='f_emb')
trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
# Load word2vec, if applicable
if embed_map == None:
print 'Loading word2vec embeddings...'
embed_map = load_googlenews_vectors(path_to_word2vec)
# Lookup table using vocab expansion trick
print 'Creating word lookup tables...'
table = lookup_table(options, embed_map, worddict, word_idict, f_emb)
# Store everything we need in a dictionary
print 'Packing up...'
model = {}
model['options'] = options
model['table'] = table
model['f_w2v'] = f_w2v
return model
评论列表
文章目录