def sort_by_tfidf(question, paragraphs):
tfidf = TfidfVectorizer(strip_accents="unicode", stop_words=spacy.en.STOP_WORDS, decode_error='replace')
try:
para_features = tfidf.fit_transform(paragraphs)
q_features = tfidf.transform([question])
except ValueError:
return [(i, 0.0) for i in range(len(paragraphs))]
dists = pairwise_distances(q_features, para_features, "cosine").ravel()
sorted_ix = np.lexsort((paragraphs, dists)) # in case of ties, use the earlier paragraph
return [(i, 1.0 - dists[i]) for i in sorted_ix]
评论列表
文章目录