def _find_neighborhoods(self, targets, constraints):
if not constraints:
raise ValueError('No constraints in neighbor search.')
if any(np.isnan(v) for v in constraints.values()):
raise ValueError('Nan constraints in neighbor search.')
# Extract the targets, constraints from the dataset.
lookup = list(targets) + list(constraints)
D = self._dataset(lookup)
# Not enough neighbors: crash for now. Workarounds include:
# (i) reduce K, (ii) randomly drop constraints, (iii) impute dataset.
if len(D) < self.K:
raise ValueError('Not enough neighbors: %s'
% ((targets, constraints),))
# Code the dataset with Euclidean embedding.
N = len(targets)
D_qr_code = self._dummy_code(D[:,:N], lookup[:N])
D_ev_code = self._dummy_code(D[:,N:], lookup[N:])
D_code = np.column_stack((D_qr_code, D_ev_code))
# Run nearest neighbor search on the constraints only.
constraints_code = self._dummy_code(
[constraints.values()], constraints.keys())
dist, neighbors = KDTree(D_ev_code).query(constraints_code, k=len(D))
# Check for equidistant neighbors and possibly extend the search.
valid = [i for i, d in enumerate(dist[0]) if d <= dist[0][self.K-1]]
if self.K < len(valid):
neighbors = self.rng.choice(neighbors[0][valid],
replace=False, size=self.K)
else:
neighbors = neighbors[0][:self.K]
# For each neighbor, find its nearest M on the full lookup set.
_, ex = KDTree(D_code).query(D_code[neighbors], k=min(self.M, self.K))
# Return the dataset and the list of neighborhoods.
return D[:,:len(targets)], ex
评论列表
文章目录