def get_nearby_words(main_words):
main_inds = {}
all_words = []
all_vecs = []
with open(OPTS.wordvec_file) as f:
for i, line in tqdm(enumerate(f)):
toks = line.rstrip().split(' ')
word = unicode(toks[0], encoding='ISO-8859-1')
vec = np.array([float(x) for x in toks[1:]])
all_words.append(word)
all_vecs.append(vec)
if word in main_words:
main_inds[word] = i
print >> sys.stderr, 'Found vectors for %d/%d words = %.2f%%' % (
len(main_inds), len(main_words), 100.0 * len(main_inds) / len(main_words))
tree = KDTree(all_vecs)
nearby_words = {}
for word in tqdm(main_inds):
dists, inds = tree.query([all_vecs[main_inds[word]]],
k=OPTS.num_neighbors + 1)
nearby_words[word] = [
{'word': all_words[i], 'dist': d} for d, i in zip(dists[0], inds[0])]
return nearby_words
find_squad_nearby_words.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录