base.py 文件源码-python代码片段

def query(self, query, k=None, matched_indices=None):
        # matching step
        matching_ind = self._matching(query)
        # print(matching_ind, file=sys.stderr)
        Xm, matched_doc_ids = self._X[matching_ind], self._y[matching_ind]
        # matching_docs, matching_doc_ids = self._matching(query)
        # calculate elements to retrieve
        n_ret = len(matching_ind)
        if n_ret == 0:
            return []
        if self.verbose > 0:
            print("Found {} matches:".format(n_ret))
        # n_ret = min(n_ret, k) if k > 0 else n_ret
        # model dependent transformation
        xq = self._cv.transform([query])
        q = self.tfidf.transform(xq)
        # Xm = self.vectorizer.transform(matching_docs)
        # model dependent nearest neighbor search or scoring or whatever
        nn = NearestNeighbors(metric='cosine', algorithm='brute').fit(Xm)
        # abuse kneighbors in this case
        # AS q only contains one element, we only need its results.
        if k is not None and k < n_ret:
            n_ret = k

        ind = nn.kneighbors(q,  # q contains a single element
                            n_neighbors=n_ret,  # limit to k neighbors
                            return_distance=False)[0]  # so we only need 1 res
        # dont forget to convert the indices to document ids of matching
        labels = matched_doc_ids[ind]
        return labels