def query(self, query, k=None, matched_indices=None):
# matching step
matching_ind = self._matching(query)
# print(matching_ind, file=sys.stderr)
Xm, matched_doc_ids = self._X[matching_ind], self._y[matching_ind]
# matching_docs, matching_doc_ids = self._matching(query)
# calculate elements to retrieve
n_ret = len(matching_ind)
if n_ret == 0:
return []
if self.verbose > 0:
print("Found {} matches:".format(n_ret))
# n_ret = min(n_ret, k) if k > 0 else n_ret
# model dependent transformation
xq = self._cv.transform([query])
q = self.tfidf.transform(xq)
# Xm = self.vectorizer.transform(matching_docs)
# model dependent nearest neighbor search or scoring or whatever
nn = NearestNeighbors(metric='cosine', algorithm='brute').fit(Xm)
# abuse kneighbors in this case
# AS q only contains one element, we only need its results.
if k is not None and k < n_ret:
n_ret = k
ind = nn.kneighbors(q, # q contains a single element
n_neighbors=n_ret, # limit to k neighbors
return_distance=False)[0] # so we only need 1 res
# dont forget to convert the indices to document ids of matching
labels = matched_doc_ids[ind]
return labels
评论列表
文章目录