python类pairwise_distances()的实例源码-面圈网

document_splitter.py 文件源码项目：document-qa 作者: allenai 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def prune(self, question, paragraphs: List[ExtractedParagraph]):
        if not self.filter_dist_one and len(paragraphs) == 1:
            return paragraphs

        tfidf = TfidfVectorizer(strip_accents="unicode", stop_words=self.stop.words)
        text = []
        for para in paragraphs:
            text.append(" ".join(" ".join(s) for s in para.text))
        try:
            para_features = tfidf.fit_transform(text)
            q_features = tfidf.transform([" ".join(question)])
        except ValueError:
            return []

        dists = pairwise_distances(q_features, para_features, "cosine").ravel()
        sorted_ix = np.lexsort(([x.start for x in paragraphs], dists))  # in case of ties, use the earlier paragraph

        if self.filter_dist_one:
            return [paragraphs[i] for i in sorted_ix[:self.n_to_select] if dists[i] < 1.0]
        else:
            return [paragraphs[i] for i in sorted_ix[:self.n_to_select]]

document_splitter.py 文件源码项目：document-qa 作者: allenai 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def dists(self, question, paragraphs: List[ExtractedParagraph]):
        tfidf = TfidfVectorizer(strip_accents="unicode", stop_words=self.stop.words)
        text = []
        for para in paragraphs:
            text.append(" ".join(" ".join(s) for s in para.text))
        try:
            para_features = tfidf.fit_transform(text)
            q_features = tfidf.transform([" ".join(question)])
        except ValueError:
            return []

        dists = pairwise_distances(q_features, para_features, "cosine").ravel()
        sorted_ix = np.lexsort(([x.start for x in paragraphs], dists))  # in case of ties, use the earlier paragraph

        if self.filter_dist_one:
            return [(paragraphs[i], dists[i]) for i in sorted_ix[:self.n_to_select] if dists[i] < 1.0]
        else:
            return [(paragraphs[i], dists[i]) for i in sorted_ix[:self.n_to_select]]

dataimport.py 文件源码项目：corporadb 作者: nlesc-sherlock 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def find_distance_matrix(self, vector, metric='cosine'):
        '''
        compute distance matrix between topis using cosine or euclidean
        distance (default=cosine distance)
        '''
        if metric == 'cosine':
            distance_matrix = pairwise_distances(vector,
                                                metric='cosine')
            # diagonals should be exactly zero, so remove rounding errors
            numpy.fill_diagonal(distance_matrix, 0)
        if metric == 'euclidean':
            distance_matrix = pairwise_distances(vector,
                                                metric='euclidean')
        return distance_matrix

prep_wikiqa_data.py 文件源码项目：answer-triggering 作者: jiez-osu 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def find_similar_words(wordvecs):
    """ Use loaded word embeddings to find out the most similar words in the
    embedded vector space.
    """
    from sklearn.metrics import pairwise_distances
    from scipy.spatial.distance import cosine
    pairwise_sim_mat = 1 - pairwise_distances(wordvecs.W[1:],
                                              metric='cosine',
                                              # metric='euclidean',
                                              )

    id2word = {}
    for key, value in wordvecs.word_idx_map.iteritems():
        assert(value not in id2word)
        id2word[value] = key
    while True:
        word = raw_input("Enter a word ('STOP' to quit): ")
        if word == 'STOP': break
        try:
            w_id = wordvecs.word_idx_map[word]
        except KeyError:
            print '%s not in the vocabulary.' % word
        sim_w_id  = pairwise_sim_mat[w_id-1].argsort()[-10:][::-1]
        for i in sim_w_id:
            print id2word[i+1],
        print ''

preprocessing.py 文件源码项目：jack 作者: uclmr 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def sort_by_tfidf(question, paragraphs):
    tfidf = TfidfVectorizer(strip_accents="unicode", stop_words=spacy.en.STOP_WORDS, decode_error='replace')
    try:
        para_features = tfidf.fit_transform(paragraphs)
        q_features = tfidf.transform([question])
    except ValueError:
        return [(i, 0.0) for i in range(len(paragraphs))]

    dists = pairwise_distances(q_features, para_features, "cosine").ravel()
    sorted_ix = np.lexsort((paragraphs, dists))  # in case of ties, use the earlier paragraph

    return [(i, 1.0 - dists[i]) for i in sorted_ix]

test_metrics.py 文件源码项目：dask-ml 作者: dask 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_pairwise_distances(X_blobs):
    centers = X_blobs[::100].compute()
    result = dm.pairwise_distances(X_blobs, centers)
    expected = sm.pairwise_distances(X_blobs.compute(), centers)
    assert_eq(result, expected, atol=1e-4)

pairwise.py 文件源码项目：dask-ml 作者: dask 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def pairwise_distances(X, Y, metric='euclidean', n_jobs=None, **kwargs):
    if isinstance(Y, da.Array):
        raise TypeError("`Y` must be a numpy array")
    chunks = (X.chunks[0], (len(Y),))
    return X.map_blocks(metrics.pairwise_distances, Y,
                        dtype=float, chunks=chunks,
                        metric=metric, **kwargs)

llc.py 文件源码项目：feature-aggregation 作者: paschalidoud 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def transform(self, X):
        """Compute the LLC representation of the provided data.

        Parameters
        ----------
        X : array_like or list
            The local features to aggregate. They must be either nd arrays or
            a list of nd arrays. In case of a list each item is aggregated
            separately.
        """
        # Get the local features and the number of local features per document
        X, lengths = self._reshape_local_features(X)

        # Preprocess the lengths list into indexes in the local feature array
        starts = np.cumsum([0] + lengths).astype(int)
        ends = np.cumsum(lengths).astype(int)

        # Calculate the nearest neighbors
        centroids = self._clusterer.cluster_centers_
        distances = pairwise_distances(X, centroids)
        K = self.neighbors
        neighbors = np.argpartition(distances, K)[:, :K]

        # Compute the llc representation
        llc = np.zeros((len(lengths), self.n_codewords))
        L2 = self.beta * np.eye(X.shape[1])
        for i, (s, e) in enumerate(zip(starts, ends)):
            for j in range(s, e):
                # a = argmin_{1^T a = 1} ||x - Ca||_2^2 + \beta ||a||_2^2
                C = centroids[neighbors[j]]
                a = C.dot(np.linalg.inv(C.T.dot(C) + L2)).dot(X[j])
                llc[i, neighbors[j]] = np.maximum(
                    llc[i, neighbors[j]],
                    a / a.sum()
                )

        return llc

ClassSeparation.py 文件源码项目：SecuML 作者: ANSSI-FR 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def computePerformance(self, instances):
        X = instances.features
        labels = instances.true_labels
        # For unsupervised projection methods, the performance is always computed with the labels (not the families).
        if hasattr(self.projection.conf, 'families_supervision'):
            if self.projection.conf.families_supervision:
                labels = instances.true_families
        unique_labels, label_inds = np.unique(labels, return_inverse = True)
        ratio = 0
        for li in xrange(len(unique_labels)):
            Xc  = X[label_inds == li]
            Xnc = X[label_inds != li]
            ratio += pairwise_distances(Xc).mean() / pairwise_distances(Xc, Xnc).mean()
        self.class_separation = ratio / len(unique_labels)

search.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _compute_score(q, X, metric):
        """ Internal method to compute the scores """

        from .metrics import _scale_cosine_similarity

        dist = pairwise_distances(q, X, 'cosine')
        dist = dist[0]

        scores = 1 - dist

        scores = _scale_cosine_similarity(scores, metric=metric)

        return scores

visualize.py 文件源码项目：vsmlib 作者: undertherain 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def draw_features_and_similarity(mm, words_of_interest):
    rows, cols, xlabels = mm.filter_submatrix(words_of_interest, 25)
    ax = plt.subplot(1, 2, 1)
    plot_heat(ax, cols, xlabels, words_of_interest)
    # plot_heat(ax,abs(m),numbered)
    ax = plt.subplot(1, 2, 2)
    t = 1 - pairwise_distances(rows, metric="cosine")
    np.fill_diagonal(t, 0)
    plot_heat(ax, t, words_of_interest, words_of_interest)
    # plt.savefig("m1.pdf")

document_splitter.py 文件源码项目：document-qa 作者: allenai 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def score_paragraphs(self, question, paragraphs: List[ExtractedParagraphWithAnswers]):
        tfidf = self._tfidf
        text = []
        for para in paragraphs:
            text.append(" ".join(" ".join(s) for s in para.text))
        try:
            para_features = tfidf.fit_transform(text)
            q_features = tfidf.transform([" ".join(question)])
        except ValueError:
            return []

        q_words = {x for x in question if x.lower() not in self._stop}
        q_words_lower = {x.lower() for x in q_words}
        word_matches_features = np.zeros((len(paragraphs), 2))
        for para_ix, para in enumerate(paragraphs):
            found = set()
            found_lower = set()
            for sent in para.text:
                for word in sent:
                    if word in q_words:
                        found.add(word)
                    elif word.lower() in q_words_lower:
                        found_lower.add(word.lower())
            word_matches_features[para_ix, 0] = len(found)
            word_matches_features[para_ix, 1] = len(found_lower)

        tfidf = pairwise_distances(q_features, para_features, "cosine").ravel()
        starts = np.array([p.start for p in paragraphs])
        log_word_start = np.log(starts/400.0 + 1)
        first = starts == 0
        scores = tfidf * self.TFIDF_W + self.LOG_WORD_START_W * log_word_start + self.FIRST_W * first +\
                 self.LOWER_WORD_W * word_matches_features[:, 1] + self.WORD_W * word_matches_features[:, 0]
        return scores

squad_document_qa.py 文件源码项目：document-qa 作者: allenai 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def rank(self, questions: List[List[str]], paragraphs: List[List[List[str]]]):
        tfidf = self._tfidf
        para_features = tfidf.fit_transform([" ".join(" ".join(s) for s in x) for x in paragraphs])
        q_features = tfidf.transform([" ".join(q) for q in questions])
        scores = pairwise_distances(q_features, para_features, "cosine")
        return scores

k_means_clust.py 文件源码项目：Python 作者: TheAlgorithms 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def centroid_pairwise_dist(X,centroids):
    return pairwise_distances(X,centroids,metric='euclidean')

k_means_clust.py 文件源码项目：Python 作者: TheAlgorithms 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def compute_heterogeneity(data, k, centroids, cluster_assignment):

    heterogeneity = 0.0
    for i in range(k):

        # Select all data points that belong to cluster i. Fill in the blank (RHS only)
        member_data_points = data[cluster_assignment==i, :]

        if member_data_points.shape[0] > 0: # check if i-th cluster is non-empty
            # Compute distances from centroid to data points (RHS only)
            distances = pairwise_distances(member_data_points, [centroids[i]], metric='euclidean')
            squared_distances = distances**2
            heterogeneity += np.sum(squared_distances)

    return heterogeneity

random_layer.py 文件源码项目：SVM-CNN 作者: dlmacedo 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _compute_radii(self):
        """Generate RBF radii"""

        # use supplied radii if present
        radii = self._get_user_components('radii')

        # compute radii
        if (radii is None):
            centers = self.components_['centers']

            n_centers = centers.shape[0]
            max_dist = np.max(pairwise_distances(centers))
            radii = np.ones(n_centers) * max_dist/sqrt(2.0 * n_centers)

        self.components_['radii'] = radii

distance.py 文件源码项目：fitr 作者: abrahamnunes 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def parameter_distance(params, dist_metric='canberra', scale='minmax', return_scaled=False):
    """
    Computes distances between subjects' respective parameter estimates

    Parameters
    ----------
    params : ndarray(shape=(nsubjects, nsubjects))
        Array of parameter estimates
    dist_metric : str (default='canberra')
        Distance metric to be used. Can take any value acceptable by ``sklearn.metrics.pairwise_distances``.
    scale : {'minmax', 'standard', 'none'}
        How to scale the parameters for distance computation
    return_scaled : bool
        Whether to return scaled parameters
    """

    if scale != 'none':
        if scale == 'minmax':
            scaler = MinMaxScaler()
        if scale == 'standard':
            scaler = StandardScaler()

        nparams = np.shape(params)[1]
        for j in range(nparams):
            scaledparam = scaler.fit_transform(params[:, j].reshape(-1, 1))
            params[:, j] = scaledparam.flatten()

    if return_scaled is True:
        D = (pairwise_distances(params, metric=dist_metric), params)
    else:
        D = pairwise_distances(params, metric=dist_metric)

    return D

recommendation_module.py 文件源码项目：news-search-engine 作者: 01joy 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def construct_k_nearest_matrix(self, dt_matrix, k):
        tmp = np.array(1 - pairwise_distances(dt_matrix[dt_matrix.columns[1:]], metric = "cosine"))
        similarity_matrix = pd.DataFrame(tmp, index = dt_matrix.index.tolist(), columns = dt_matrix.index.tolist())
        for i in similarity_matrix.index:
            tmp = [int(i),[]]
            j = 0
            while j < k:
                max_col = similarity_matrix.loc[i].idxmax(axis = 1)
                similarity_matrix.loc[i][max_col] =  -1
                if max_col != i:
                    tmp[1].append(int(max_col)) #max column name
                    j += 1
            self.k_nearest.append(tmp)

test_neighbors.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 64 收藏 0 点赞 0 评论 0

def test_precomputed_cross_validation():
    # Ensure array is split correctly
    rng = np.random.RandomState(0)
    X = rng.rand(20, 2)
    D = pairwise_distances(X, metric='euclidean')
    y = rng.randint(3, size=20)
    for Est in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        metric_score = cross_val_score(Est(), X, y)
        precomp_score = cross_val_score(Est(metric='precomputed'), D, y)
        assert_array_equal(metric_score, precomp_score)

test_neighbors.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_non_euclidean_kneighbors():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)

    # Find a reasonable radius.
    dist_array = pairwise_distances(X).flatten()
    np.sort(dist_array)
    radius = dist_array[15]

    # Test kneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.kneighbors_graph(
            X, 3, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray())

    # Test radiusneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.radius_neighbors_graph(
            X, radius, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A)

    # Raise error when wrong parameters are supplied,
    X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3,
                  metric='euclidean')
    X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs,
                  radius, metric='euclidean')

test_unsupervised.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_silhouette():
    # Tests the Silhouette Coefficient.
    dataset = datasets.load_iris()
    X = dataset.data
    y = dataset.target
    D = pairwise_distances(X, metric='euclidean')
    # Given that the actual labels are used, we can assume that S would be
    # positive.
    silhouette = silhouette_score(D, y, metric='precomputed')
    assert(silhouette > 0)
    # Test without calculating D
    silhouette_metric = silhouette_score(X, y, metric='euclidean')
    assert_almost_equal(silhouette, silhouette_metric)
    # Test with sampling
    silhouette = silhouette_score(D, y, metric='precomputed',
                                  sample_size=int(X.shape[0] / 2),
                                  random_state=0)
    silhouette_metric = silhouette_score(X, y, metric='euclidean',
                                         sample_size=int(X.shape[0] / 2),
                                         random_state=0)
    assert(silhouette > 0)
    assert(silhouette_metric > 0)
    assert_almost_equal(silhouette_metric, silhouette)
    # Test with sparse X
    X_sparse = csr_matrix(X)
    D = pairwise_distances(X_sparse, metric='euclidean')
    silhouette = silhouette_score(D, y, metric='precomputed')
    assert(silhouette > 0)

test_spectral.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_spectral_amg_mode():
    # Test the amg mode of SpectralClustering
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    try:
        from pyamg import smoothed_aggregation_solver

        amg_loaded = True
    except ImportError:
        amg_loaded = False
    if amg_loaded:
        labels = spectral_clustering(S, n_clusters=len(centers),
                                     random_state=0, eigen_solver="amg")
        # We don't care too much that it's good, just that it *worked*.
        # There does have to be some lower limit on the performance though.
        assert_greater(np.mean(labels == true_labels), .3)
    else:
        assert_raises(ValueError, spectral_embedding, S,
                      n_components=len(centers),
                      random_state=0, eigen_solver="amg")

test_spectral.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_spectral_unknown_assign_labels():
    # Test that SpectralClustering fails with an unknown assign_labels set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, assign_labels="<unknown>")

_ner.py 文件源码项目：soy 作者: lovit 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def train_wordfilter_coefficient(self, seed_words, wordfilters):
        mined_words = defaultdict(lambda: defaultdict(lambda: 0))
        filter_set = {wordfilter for (rng, wordfilter) in wordfilters}
        ranges = {rng for (rng, wordfilter) in wordfilters}

        for num_doc, doc in enumerate(Word2vecCorpus(self.corpus_file)):
            len_doc = len(doc)

            for rng in ranges:
                (fb, fe) = rng

                if len_doc < (fe - fb + 1):
                    continue

                words = doc[-fb:-fe]
                contexts = []

                for i, word in enumerate(doc):
                    if (i + fb < 0) or (i + fe >= len_doc):
                        continue
                    contexts.append(tuple([doc[i+r] for r in range(fb, fe+1) if r != 0]))

                for i, context in enumerate(contexts):
                    if context in filter_set:
                        mined_words[(rng, context)][words[i]] += 1

        result = []

        seeds_idx = sorted([self.word2index[seed] for seed in seed_words])
        seeds_vec = [self.word2vec_model.syn0[idx] for idx in seeds_idx]

        for ((rng, context), word2freq) in sorted(mined_words.items(), key=lambda x:sum(x[1].values()), reverse=True):

                word_freq = [(self.word2index[word], freq) for (word, freq) in word2freq.items()]
                word_freq = [v for v in word_freq if v[0] != -1]
                word_freq = sorted(word_freq)
                idx = [pair[0] for pair in word_freq]
                word_vec = self.word2vec_model.syn0[idx]
                sum_freq = sum([v[1] for v in word_freq])

                score = 0

                for seed_vec in seeds_vec:
                    sim = 1 + -1 * pairwise_distances(word_vec, seed_vec, metric='cosine')
                    score += sum([wf[1] * s for wf, s in zip(word_freq, sim)]) / sum_freq

                score /= len(seed_words)
                result.append((context, rng, score, sum_freq))

        return result

distance.py 文件源码项目：fitr 作者: abrahamnunes 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def likelihood_distance(loglik_func, data, params, diff_metric='sq', dist_metric='cosine', verbose=False):
    """
    Estimates the likelihood of the data from the i'th subject using the parameter estimates of the j'th subject, for all i and j, then computes the distance between subjects' likelihood difference vectors

    Parameters
    ----------
    loglik_func : function
        The log-likelihood function to be used
    data : dict
        Data formatted for input into the log-likelihood function
    params : ndarray(shape=(nsubjects, nparams))
        Array of parameter estimates
    diff_metric : {'sq', 'diff', 'abs'}
        Which type of difference measure to compute, 'diff' is simple subtractive difference, whereas 'sq' and 'abs' are the squared and absolute differences, respectively
    dist_metric : str (default='cosine')
        The pairwise distance metric to use. Any option that can be passed into ``sklearn.metrics.pairwise_distances`` can work.
    verbose : bool
        Whether to print out progress

    Returns
    -------
    ndarray(shape=(nsubjects, nsubjects))
    """
    nsubjects = np.shape(params)[0]
    D = np.zeros([nsubjects, nsubjects])
    for i in range(nsubjects):
        S = data[i]['S']
        A = data[i]['A']
        R = data[i]['R']

        if verbose is True:
            print('Likelihood Differences: Subject ' + str(i))

        # Compute loglikelihood for subject i with own data
        LL0 = loglik_func(params=params[i, :],
                          states=S,
                          actions=A,
                          rewards=R)

        for j in range(nsubjects):
            if i !=j:
                LL1 = loglik_func(params=params[j, :],
                                  states=S,
                                  actions=A,
                                  rewards=R)

                if diff_metric == 'diff':
                    D[i, j] = LL1 - LL0
                elif diff_metric == 'sq':
                    D[i, j] = (LL1 - LL0)**2
                elif diff_metric == 'abs':
                    D[i, j] = np.abs(LL1 - LL0)

    return pairwise_distances(D, metric=dist_metric)

test_neighbors.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_precomputed(random_state=42):
    """Tests unsupervised NearestNeighbors with a distance matrix."""
    # Note: smaller samples may result in spurious test success
    rng = np.random.RandomState(random_state)
    X = rng.random_sample((10, 4))
    Y = rng.random_sample((3, 4))
    DXX = metrics.pairwise_distances(X, metric='euclidean')
    DYX = metrics.pairwise_distances(Y, X, metric='euclidean')
    for method in ['kneighbors']:
        # TODO: also test radius_neighbors, but requires different assertion

        # As a feature matrix (n_samples by n_features)
        nbrs_X = neighbors.NearestNeighbors(n_neighbors=3)
        nbrs_X.fit(X)
        dist_X, ind_X = getattr(nbrs_X, method)(Y)

        # As a dense distance matrix (n_samples by n_samples)
        nbrs_D = neighbors.NearestNeighbors(n_neighbors=3, algorithm='brute',
                                            metric='precomputed')
        nbrs_D.fit(DXX)
        dist_D, ind_D = getattr(nbrs_D, method)(DYX)
        assert_array_almost_equal(dist_X, dist_D)
        assert_array_almost_equal(ind_X, ind_D)

        # Check auto works too
        nbrs_D = neighbors.NearestNeighbors(n_neighbors=3, algorithm='auto',
                                            metric='precomputed')
        nbrs_D.fit(DXX)
        dist_D, ind_D = getattr(nbrs_D, method)(DYX)
        assert_array_almost_equal(dist_X, dist_D)
        assert_array_almost_equal(ind_X, ind_D)

        # Check X=None in prediction
        dist_X, ind_X = getattr(nbrs_X, method)(None)
        dist_D, ind_D = getattr(nbrs_D, method)(None)
        assert_array_almost_equal(dist_X, dist_D)
        assert_array_almost_equal(ind_X, ind_D)

        # Must raise a ValueError if the matrix is not of correct shape
        assert_raises(ValueError, getattr(nbrs_D, method), X)

    target = np.arange(X.shape[0])
    for Est in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        print(Est)
        est = Est(metric='euclidean')
        est.radius = est.n_neighbors = 1
        pred_X = est.fit(X, target).predict(Y)
        est.metric = 'precomputed'
        pred_D = est.fit(DXX, target).predict(DYX)
        assert_array_almost_equal(pred_X, pred_D)