def find_similar_words(wordvecs):
""" Use loaded word embeddings to find out the most similar words in the
embedded vector space.
"""
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine
pairwise_sim_mat = 1 - pairwise_distances(wordvecs.W[1:],
metric='cosine',
# metric='euclidean',
)
id2word = {}
for key, value in wordvecs.word_idx_map.iteritems():
assert(value not in id2word)
id2word[value] = key
while True:
word = raw_input("Enter a word ('STOP' to quit): ")
if word == 'STOP': break
try:
w_id = wordvecs.word_idx_map[word]
except KeyError:
print '%s not in the vocabulary.' % word
sim_w_id = pairwise_sim_mat[w_id-1].argsort()[-10:][::-1]
for i in sim_w_id:
print id2word[i+1],
print ''
评论列表
文章目录