def get_chunk_nns(self, X, q_centroids, question_details, chunk):
nbrs = NearestNeighbors(algorithm='brute', metric='cosine', n_neighbors=1000).fit(X)
dist, nns = nbrs.kneighbors(q_centroids, return_distance=True)
q_array = []
for q_point in range(nns.shape[0]):
doc_nns = []
for n_point in range(nns.shape[1]):
doc_nns.append(self.idmap[chunk[0] + nns[q_point, n_point]])
q = Question(question_details[q_point][0], question_details[q_point][1], doc_nns, list(dist[q_point, :]))
q_array.append(q)
return q_array
# Dataset indeces are splitted in N chucks. Nearest top-(N*k) neighbors are extracted from each chunk, and then
# the final top-k neighbors are extracted from those.
评论列表
文章目录