python类neighbors()的实例源码-面圈网

embeddings.py 文件源码项目：django-corenlp 作者: arunchaganty 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def k_nearest_approx(self, vec, k):
        """Get the k nearest neighbors of a vector (in terms of cosine similarity).

        :param (np.array) vec: query vector
        :param (int) k: number of top neighbors to return

        :return (list[tuple[str, float]]): a list of (word, cosine similarity) pairs, in descending order
        """
        if not hasattr(self, 'lshf'):
            self.lshf = self._init_lsh_forest()

        # TODO(kelvin): make this inner product score, to be consistent with k_nearest
        distances, neighbors = self.lshf.kneighbors(vec, n_neighbors=k, return_distance=True)
        scores = np.subtract(1, distances)
        nbr_score_pairs = self.score_map(np.squeeze(neighbors), np.squeeze(scores))

        return sorted(nbr_score_pairs.items(), key=lambda x: x[1], reverse=True)

embeddings.py 文件源码项目：django-corenlp 作者: arunchaganty 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def k_nearest(self, vec, k):
        """Get the k nearest neighbors of a vector (in terms of highest inner products).

        :param (np.array) vec: query vector
        :param (int) k: number of top neighbors to return

        :return (list[tuple[str, float]]): a list of (word, score) pairs, in descending order
        """
        nbr_score_pairs = self.inner_products(vec)
        return sorted(nbr_score_pairs.items(), key=lambda x: x[1], reverse=True)[:k]

embeddings.py 文件源码项目：django-corenlp 作者: arunchaganty 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def _init_lsh_forest(self):
        """Construct an LSH forest for nearest neighbor search."""
        import sklearn.neighbors
        lshf = sklearn.neighbors.LSHForest()
        lshf.fit(self.array)
        return lshf

embeddings.py 文件源码项目：django-corenlp 作者: arunchaganty 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def k_nearest(self, vec, k):
        """Get the k nearest neighbors of a vector (in terms of highest inner products).

        :param (np.array) vec: query vector
        :param (int) k: number of top neighbors to return

        :return (list[tuple[str, float]]): a list of (word, score) pairs, in descending order
        """
        nbr_score_pairs = self.inner_products(vec)
        return sorted(nbr_score_pairs.items(), key=lambda x: x[1], reverse=True)[:k]

embeddings.py 文件源码项目：django-corenlp 作者: arunchaganty 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _init_lsh_forest(self):
        """Construct an LSH forest for nearest neighbor search."""
        import sklearn.neighbors
        lshf = sklearn.neighbors.LSHForest()
        lshf.fit(self.array)
        return lshf

sklearn_model_selection.py 文件源码项目：ac_pysmac 作者: belkhir-nacim 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def choose_classifier(classifier,  # which classifier to use
                      # parameters for the tree based classifiers
                      trees_n_estimators=None, trees_criterion=None,
                      trees_max_features=None, trees_max_depth=None,
                      # the ones for k-nearest-neighbors
                      knn_n_neighbors=None, knn_weights=None):
    # note that possibly inactive variables have to be optional
    # as ac_pysmac does not assign a value for inactive variables
    # during the minimization phase
    if classifier == 'random_forest':
        predictor = sklearn.ensemble.RandomForestClassifier(
            trees_n_estimators, trees_criterion,
            trees_max_features, trees_max_depth)
    elif classifier == 'extra_trees':
        predictor = sklearn.ensemble.ExtraTreesClassifier(
            trees_n_estimators, trees_criterion,
            trees_max_features, trees_max_depth)
    elif classifier == 'k_nearest_neighbors':
        predictor = sklearn.neighbors.KNeighborsClassifier(
            knn_n_neighbors, knn_weights)

    predictor.fit(X_train, Y_train)
    return -predictor.score(X_test, Y_test)


# defining all the parameters with respective defaults.

advanced_supvervised_model_trainer.py 文件源码项目：healthcareai-py 作者: HealthCatalyst 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def knn(self,
            scoring_metric='roc_auc',
            hyperparameter_grid=None,
            randomized_search=True,
            number_iteration_samples=10):
        """
        A light wrapper for Sklearn's knn classifier that performs randomized search over an overridable default
        hyperparameter grid.

        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for classification
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)
            number_iteration_samples (int): Number of models to train during the randomized search for exploring the
                hyperparameter space. More may lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_classification('KNN')
        if hyperparameter_grid is None:
            neighbors = list(range(5, 26))
            hyperparameter_grid = {'n_neighbors': neighbors, 'weights': ['uniform', 'distance']}
            number_iteration_samples = 10

            print('KNN Grid: {}'.format(hyperparameter_grid))
        algorithm = get_algorithm(KNeighborsClassifier,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model

runDBSCAN.py 文件源码项目：simsearch 作者: chrisjmccormick 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def findEps(ssearch):
    """
    Find a good epsilon value to use.
    """
    ###########################################################################
    # Calculate nearest neighbors
    ###########################################################################

    # Create a nearest neighbors model--we need 2 nearest neighbors since the 
    # nearest neighbor to a point is going to be itself.
    nbrs_model = NearestNeighbors(n_neighbors=2, algorithm='brute', metric='cosine').fit(ssearch.index.index)

    t0 = time.time()

    # Find nearest neighbors.
    distances, indices = nbrs_model.kneighbors(ssearch.index.index)

    elapsed = time.time() - t0

    print 'Took %.2f seconds' % elapsed

    distances = [d[1] for d in distances]
    indeces = [ind[1] for ind in indices]

    ###########################################################################
    # Histogram the nearest neighbor distances.
    ###########################################################################

    import matplotlib.pyplot as plt

    counts, bins, patches = plt.hist(distances, bins=16)
    plt.title("Nearest neighbor distances")
    plt.xlabel("Distance")
    plt.ylabel("Frequency")

    print '\n%d bins:' % len(counts)

    countAcc = 0
    num_points = len(ssearch.index.index)

    for i in range(0, len(counts)):
        countAcc += counts[i]

        # Calculate the percentage of values which fall below the upper limit 
        # of this bin.
        prcnt = float(countAcc) / float(num_points) * 100.0    

        print '  %.2f%% < %.2f' % (prcnt, bins[i + 1])

runDBSCAN.py 文件源码项目：simsearch 作者: chrisjmccormick 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def findMinPts(ssearch, eps):
    """
    Find a good value for MinPts.
    """

    ###########################################################################
    # Count neighbors within threshold
    ###########################################################################

    print 'Calculating pair-wise distances...'
    # Calculate pair-wise cosine distance for all documents.
    t0 = time.time()

    DD = sklearn.metrics.pairwise.cosine_distances(ssearch.index.index)

    elapsed = time.time() - t0

    print '    Took %.2f seconds' % elapsed

    print 'Counting number of neighbors...'

    t0 = time.time()

    # Create a list to hold the number of neighbors for each point.
    numNeighbors = [0]*len(DD)

    for i in range(0, len(DD)):
        dists = DD[i]

        count = 0
        for j in range(0, len(DD)):
            if (dists[j] < eps):
                count += 1

        numNeighbors[i] = count            

    elapsed = time.time() - t0

    print '    Took %.2f seconds' % elapsed

    ###############################################################################
    # Histogram the nearest neighbor distances.
    ###############################################################################

    import matplotlib.pyplot as plt

    counts, bins, patches = plt.hist(numNeighbors, bins=60)
    plt.title("Number of neighbors")
    plt.xlabel("Number of neighbors")
    plt.ylabel("Frequency")

    print '\n%d bins:' % (len(bins) - 1)
    binsStr = ''
    for b in bins:
        binsStr += '  %0.2f' % b

    print binsStr