mvknn.py 文件源码-python代码片段

def _find_neighborhoods(self, targets, constraints):
        if not constraints:
            raise ValueError('No constraints in neighbor search.')
        if any(np.isnan(v) for v in constraints.values()):
            raise ValueError('Nan constraints in neighbor search.')
        # Extract the targets, constraints from the dataset.
        lookup = list(targets) + list(constraints)
        D = self._dataset(lookup)
        # Not enough neighbors: crash for now. Workarounds include:
        # (i) reduce K, (ii) randomly drop constraints, (iii) impute dataset.
        if len(D) < self.K:
            raise ValueError('Not enough neighbors: %s'
                % ((targets, constraints),))
        # Code the dataset with Euclidean embedding.
        N = len(targets)
        D_qr_code = self._dummy_code(D[:,:N], lookup[:N])
        D_ev_code = self._dummy_code(D[:,N:], lookup[N:])
        D_code = np.column_stack((D_qr_code, D_ev_code))
        # Run nearest neighbor search on the constraints only.
        constraints_code = self._dummy_code(
            [constraints.values()], constraints.keys())
        dist, neighbors = KDTree(D_ev_code).query(constraints_code, k=len(D))
        # Check for equidistant neighbors and possibly extend the search.
        valid = [i for i, d in enumerate(dist[0]) if d <= dist[0][self.K-1]]
        if self.K < len(valid):
            neighbors = self.rng.choice(neighbors[0][valid],
                replace=False, size=self.K)
        else:
            neighbors = neighbors[0][:self.K]
        # For each neighbor, find its nearest M on the full lookup set.
        _, ex = KDTree(D_code).query(D_code[neighbors], k=min(self.M, self.K))
        # Return the dataset and the list of neighborhoods.
        return D[:,:len(targets)], ex