knn_retrieval.py 文件源码-python代码片段

knn_retrieval.py 文件源码

python

阅读 30 收藏 0 点赞 0 评论 0

def get_chunk_nns(self, X, q_centroids, question_details, chunk):
        nbrs = NearestNeighbors(algorithm='brute', metric='cosine', n_neighbors=1000).fit(X)
        dist, nns = nbrs.kneighbors(q_centroids, return_distance=True)
        q_array = []
        for q_point in range(nns.shape[0]):
            doc_nns = []
            for n_point in range(nns.shape[1]):
                doc_nns.append(self.idmap[chunk[0] + nns[q_point, n_point]])
            q = Question(question_details[q_point][0], question_details[q_point][1], doc_nns, list(dist[q_point, :]))
            q_array.append(q)
        return q_array

    # Dataset indeces are splitted in N chucks. Nearest top-(N*k) neighbors are extracted from each chunk, and then
    # the final top-k neighbors are extracted from those.