def query(vec, model, k, max_search_radius):
data = model['data']
table = model['table']
random_vectors = model['random_vectors']
num_vector = random_vectors.shape[1]
# Compute bin index for the query vector, in bit representation.
bin_index_bits = (vec.dot(random_vectors) >= 0).flatten()
# Search nearby bins and collect candidates
candidate_set = set()
for search_radius in range(max_search_radius+1):
candidate_set = search_nearby_bins(bin_index_bits, table, search_radius, initial_candidates=candidate_set)
# Sort candidates by their true distances from the query
nearest_neighbors = pd.DataFrame({'id':list(candidate_set)})
candidates = data[np.array(list(candidate_set)),:]
nearest_neighbors['distance'] = pairwise_distances(candidates, vec, metric='cosine').flatten()
return nearest_neighbors.sort_values(by='distance').head(k), len(candidate_set)
评论列表
文章目录