def train_regressor(options, embed_map, wordvecs, worddict):
"""
Return regressor to map word2vec to RNN word space
"""
# Gather all words from word2vec that appear in wordvecs
d = defaultdict(lambda : 0)
for w in embed_map.vocab.keys():
d[w] = 1
shared = OrderedDict()
count = 0
for w in worddict.keys()[:options['n_words']-2]:
if d[w] > 0:
shared[w] = count
count += 1
# Get the vectors for all words in 'shared'
w2v = numpy.zeros((len(shared), 300), dtype='float32')
sg = numpy.zeros((len(shared), options['dim_word']), dtype='float32')
for w in shared.keys():
w2v[shared[w]] = embed_map[w]
sg[shared[w]] = wordvecs[w]
clf = LinearRegression()
clf.fit(w2v, sg)
return clf
评论列表
文章目录