def closest_docs(self, query, k=1):
"""Closest docs by dot product between query and documents
in tfidf weighted word vector space.
"""
spvec = self.text2spvec(query)
res = spvec * self.doc_mat
if len(res.data) <= k:
o_sort = np.argsort(-res.data)
else:
o = np.argpartition(-res.data, k)[0:k]
o_sort = o[np.argsort(-res.data[o])]
doc_scores = res.data[o_sort]
doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
return doc_ids, doc_scores
评论列表
文章目录