python类NearestNeighbors()的实例源码

knn.py 文件源码 项目:tensorsne 作者: gokceneraslan 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __knn_sklearn(X, k, n_jobs=-1, verbose=False, **kwargs):

    nn = NearestNeighbors(n_neighbors=k+1, n_jobs=n_jobs,
                          algorithm='ball_tree', **kwargs)
    nn.fit(X)

    if verbose:
        print('Indexing done.')
    dist, ind = nn.kneighbors(X, k+1, return_distance=True)

    if verbose:
        print('Query done.')

    return dist[:,1:].astype(X.dtype), ind[:,1:]
som.py 文件源码 项目:dyfunconn 作者: makism 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def encode(self, data, metric = 'euclidean'):
        """ Employ a nearest-neighbor rule to encode the given ``data`` using the codebook.

        Parameters
        ----------
        data : real array-like, shape(n_samples, n_features)
            Data matrix, each row represents a sample.

        metric : string
            One of the following valid options as defined for function http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html.

            Valid options include:

             - euclidean
             - cityblock
             - l1
             - cosine

        Returns
        -------
        encoded_data : real array-like, shape(n_samples, n_features)
            ``data``, as represented by the prototypes in codebook.
        ts_symbols : list, shape(n_samples, 1)
            A discrete symbolic time series
        """
        nbrs = NearestNeighbors(n_neighbors = 1, algorithm = 'auto', metric = metric).fit(self.protos)
        _, self.__symbols = nbrs.kneighbors(data)
        self.__encoding = self.protos[self.__symbols]

        return (self.__encoding, self.__symbols)
mng.py 文件源码 项目:dyfunconn 作者: makism 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def encode(self, data, metric = 'euclidean'):
        """ Employ a nearest-neighbor rule to encode the given ``data`` using the codebook.

        Parameters
        ----------
        data : real array-like, shape(n_samples, n_features)
            Data matrix, each row represents a sample.

        metric : string
            One of the following valid options as defined for function `http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html`.

            Valid options include:

             - euclidean
             - cityblock
             - l1
             - cosine

        Returns
        -------
        encoded_data : real array-like, shape(n_samples, n_features)
            ``data``, as represented by the prototypes in codebook.
        ts_symbols : list, shape(n_samples, 1)
            A discrete symbolic time series
        """
        nbrs = NearestNeighbors(n_neighbors = 1, algorithm = 'auto', metric = metric).fit(self.protos)
        _, self.__symbols = nbrs.kneighbors(data)
        self.__encoding = self.protos[self.__symbols]

        return (self.__encoding, self.__symbols)
ng.py 文件源码 项目:dyfunconn 作者: makism 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def fit(self, data):
        """ Learn data, and construct a vector codebook.

        Parameters
        ----------
        data : real array-like, shape(n_samples, n_features)
            Data matrix, each row represents a sample.

        Returns
        -------
        self : object
            The instance itself
        """
        [n_samples, _] = data.shape
        self.protos = data[self.rng.choice(n_samples, self.n_protos), ]

        # avg_p = np.mean(data, 0)
        #dist_from_avg_p = np.sum(pairwise_distances(avg_p, data))
        #ndistortion = []

        for iteration in range(self.iterations):
            sample = data[self.rng.choice(n_samples, 1), ]

            t = iteration / float(self.iterations)
            lrate = self.lrate_i * (self.lrate_f / float(self.lrate_i)) ** t
            epsilon = self.epsilon_i * (self.epsilon_f / float(self.epsilon_i)) ** t

            D = pairwise_distances(sample, self.protos, metric='euclidean', n_jobs=self.n_jobs)
            I = np.argsort(np.argsort(D))

            H = np.exp(-I / epsilon).ravel()

            diff = sample - self.protos
            for proto_id in range(self.n_protos):
                self.protos[proto_id, :] += lrate * H[proto_id] * diff[proto_id, :]
                #nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto').fit(protos)
                #distances, _ = nbrs.kneighbors(data)
        #ndistortion.append( np.sum(distances) / dist_from_avg_p )

        return self
ng.py 文件源码 项目:dyfunconn 作者: makism 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def encode(self, data, metric='euclidean'):
        """ Employ a nearest-neighbor rule to encode the given ``data`` using the codebook.

        Parameters
        ----------
        data : real array-like, shape(n_samples, n_features)
            Data matrix, each row represents a sample.

        metric : string
            One of the following valid options as defined for function http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html.

            Valid options include:

             - euclidean
             - cityblock
             - l1
             - cosine

        Returns
        -------
        encoded_data : real array-like, shape(n_samples, n_features)
            ``data``, as represented by the prototypes in codebook.
        ts_symbols : list, shape(n_samples, 1)
            A discrete symbolic time series
        """
        # Perform a proposed data mining procedure as described in [Laskaris2004].
        mds = MDS(1, random_state=self.rng)
        protos_1d = mds.fit_transform(self.protos).ravel()
        sorted_protos_1d = np.argsort(protos_1d)

        sprotos = self.protos[sorted_protos_1d]

        nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto', metric=metric).fit(sprotos)
        _, self.__symbols = nbrs.kneighbors(data)
        self.__encoding = sprotos[self.__symbols]

        return (self.__encoding, self.__symbols)
models.py 文件源码 项目:kdd2017 作者: JinpengLI 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self,n_neighbors=5,loss='L2'):
        if loss in ['L1','L2','SMAPE']:
            loss = {'L1':L1,'L2':L2,'SMAPE':SMAPE}[loss]
        self.loss = loss
        self.n_neighbors = n_neighbors
        self.model = NearestNeighbors(n_neighbors,algorithm='auto',n_jobs=-1)
        self.solver = lambda x:solver(x,loss)
base_models.py 文件源码 项目:esper 作者: scanner-research 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def compute_distances(cls, inst_id):
        global feat_nn
        global feat_ids

        it = cls.objects.annotate(height=F('face__bbox_y2') - F('face__bbox_y1')).filter(
            height__gte=0.1).order_by('id')
        if feat_nn is None:
            _print('Loading features...')
            feats = list(it[::5])
            feat_ids = np.array([f.id for f in feats])
            feat_vectors = [f.load_features() for f in feats]
            X = np.vstack(feat_vectors)
            _print('Constructing KNN tree...')
            feat_nn = NearestNeighbors().fit(X)
            _print('Done!')

        # Erase distances from previous computation
        prev = list(cls.objects.filter(distto__isnull=False))
        for feat in prev:
            feat.distto = None
        cls.objects.bulk_update(prev)

        dists, indices = feat_nn.kneighbors([cls.objects.get(face=inst_id).load_features()], 1000)

        for dist, feat_id in zip(dists[0], feat_ids[indices[0]]):
            feat = cls.objects.get(id=feat_id)
            feat.distto = dist
            feat.save()
animatedness.py 文件源码 项目:esper 作者: scanner-research 项目源码 文件源码 阅读 14 收藏 0 点赞 0 评论 0
def identity_detect(videos, exemplar, features):
    log.debug('Loading features')
    ids, vectors = zip(*[((i, j), f.load_features())
                         for i, vid_features in enumerate(features)
                         for j, f in enumerate(vid_features)])

    log.debug('Building k-nn tree')
    feat_nn = NearestNeighbors().fit(np.vstack(vectors))

    log.debug('Doing look-up')
    exemplar_vector = FaceFeatures.objects.get(
        face=exemplar, labeler__name='facenet').load_features()
    dists, id_indices = feat_nn.kneighbors([exemplar_vector], min(10000, len(vectors)))

    face_map = defaultdict(list)
    for (dist, k) in zip(dists[0], id_indices[0]):
        (i, j) = ids[k]
        if dist > FEATURE_DISTANCE_THRESHOLD:
            break

        face_map[videos[i].id].append(features[i][j])

    return [face_map[video.id] for video in videos]


# Remove faces with negative coords and small height
CostFunctions.py 文件源码 项目:BatchEffectRemoval 作者: ushaham 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self,
                 MMDLayer,
                 MMDTargetTrain,
                 MMDTargetValidation_split=0.1,
                 MMDTargetSampleSize=1000,
                 n_neighbors = 25,
                 scales = None,
                 weights = None):
        if scales == None:
            print("setting scales using KNN")
            med = np.zeros(20)
            for ii in range(1,20):
                sample = MMDTargetTrain[np.random.randint(MMDTargetTrain.shape[0], size=MMDTargetSampleSize),:]
                nbrs = NearestNeighbors(n_neighbors=n_neighbors).fit(sample)
                distances,dummy = nbrs.kneighbors(sample)
                #nearest neighbor is the point so we need to exclude it
                med[ii]=np.median(distances[:,1:n_neighbors])
            med = np.median(med)  
            scales = [med/2, med, med*2] # CyTOF    
            print(scales)
        scales = K.variable(value=np.asarray(scales))
        if weights == None:
            print("setting all scale weights to 1")
            weights = K.eval(K.shape(scales)[0])
        weights = K.variable(value=np.asarray(weights))
        self.MMDLayer =  MMDLayer
        MMDTargetTrain, MMDTargetValidation = train_test_split(MMDTargetTrain, test_size=MMDTargetValidation_split, random_state=42)
        self.MMDTargetTrain = K.variable(value=MMDTargetTrain)
        self.MMDTargetTrainSize = K.eval(K.shape(self.MMDTargetTrain)[0])
        self.MMDTargetValidation = K.variable(value=MMDTargetValidation)
        self.MMDTargetValidationSize = K.eval(K.shape(self.MMDTargetValidation)[0])
        self.MMDTargetSampleSize = MMDTargetSampleSize
        self.kernel = self.RaphyKernel
        self.scales = scales
        self.weights = weights


    #calculate the raphy kernel applied to all entries in a pairwise distance matrix
NearestNeighbors.py 文件源码 项目:intelligentCampus 作者: Jackal007 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def __init__(self):
        SingleClassifier.SingleClassifier.__init__(self)
        # weak classifier
        algorithms = ['brute', 'ball_tree', 'kd_tree']
        self.clf =  NearestNeighbors(n_neighbors=2, algorithm='ball_tree')
abod.py 文件源码 项目:kenchi 作者: Y-oHr-N 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def fit(self, X, y=None):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Samples.

        Returns
        -------
        self : detector
            Return self.
        """

        X                 = check_array(X)

        self._knn         = NearestNeighbors(
            metric        = self.metric,
            metric_params = self.metric_params,
            n_jobs        = self.n_jobs,
            n_neighbors   = self.n_neighbors,
            p             = self.p
        ).fit(X)

        self.y_score_     = self.anomaly_score()
        self.threshold_   = np.percentile(
            self.y_score_, 100.0 * (1.0 - self.fpr)
        )

        return self
neighbors.py 文件源码 项目:kenchi 作者: Y-oHr-N 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def fit(self, X, y=None):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Samples.

        Returns
        -------
        self : detector
            Return self.
        """

        X                 = check_array(X)

        self._knn         = NearestNeighbors(
            metric        = self.metric,
            metric_params = self.metric_params,
            n_jobs        = self.n_jobs,
            n_neighbors   = self.n_neighbors,
            p             = self.p
        ).fit(X)

        self.y_score_     = self.anomaly_score()
        self.threshold_   = np.percentile(
            self.y_score_, 100.0 * (1.0 - self.fpr)
        )

        return self
util.py 文件源码 项目:soinn 作者: fukatani 项目源码 文件源码 阅读 46 收藏 0 点赞 0 评论 0
def calc_mahalanobis(x, y, n_neighbors):
    from sklearn.neighbors import DistanceMetric, NearestNeighbors
    DistanceMetric.get_metric('mahalanobis', V=np.cov(x))

    nn = NearestNeighbors(n_neighbors=n_neighbors,
                          algorithm='brute',
                          metric='mahalanobis',
                          metric_params={'V': np.cov(x)})
    return nn.fit(x).kneighbors(y)
knn_recsys.py 文件源码 项目:rec-sys-experiments 作者: rnowling 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def train_and_score(metric, training, testing, ks):
    print "Training and scoring"
    scores = []
    knn = NearestNeighbors(metric=metric, algorithm="brute")
    knn.fit(training)
    for k in ks:
        print "Evaluating for", k, "neighbors"
        neighbor_indices = knn.kneighbors(testing,
                                          n_neighbors=k,
                                          return_distance=False)

        all_predicted_scores = []
        all_labels = []
        for user_id in xrange(testing.shape[0]):
            user_row = testing[user_id, :]

            _, interaction_indices = user_row.nonzero()
            interacted = set(interaction_indices)
            non_interacted = set(xrange(testing.shape[1])) - interacted

            n_samples = min(len(non_interacted), len(interacted))
            sampled_interacted = random.sample(interacted, n_samples)
            sampled_non_interacted = random.sample(non_interacted, n_samples)

            indices = list(sampled_interacted)
            indices.extend(sampled_non_interacted)
            labels = [1] * n_samples
            labels.extend([0] * n_samples)

            neighbors = training[neighbor_indices[user_id, :], :]
            predicted_scores = neighbors.mean(axis=0)
            for idx in indices:
                all_predicted_scores.append(predicted_scores[0, idx])
            all_labels.extend(labels)

        print len(all_labels), len(all_predicted_scores)

        auc = roc_auc_score(all_labels, all_predicted_scores)

        print "k", k, "AUC", auc
CFmodel.py 文件源码 项目:Machine_Learning_Playground 作者: yao23 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self):
        self.knnModel = NearestNeighbors(n_neighbors=15)
        self.log = logging.getLogger(__name__)
KNNmodel.py 文件源码 项目:Machine_Learning_Playground 作者: yao23 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def train(self, userFeatureTable, ratingsMat):
        userFeatureTable.loc[:, "age"] = userFeatureTable.loc[:, "age"] / 10.
        # ad hoc fix, make sure feature's range is similar
        self.knnModel = NearestNeighbors(n_neighbors=10, algorithm='ball_tree').fit(userFeatureTable)

        # ratingMat is the rating matrix
        self.ratingsMat = ratingsMat
        self.userFeatureTable = userFeatureTable
        self.userIds = self.userFeatureTable.index  # the actual order seen by the knnmodel
user_user_cf.py 文件源码 项目:hybrid-rs-trainner 作者: SeniorSA 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def find_knn(self, target_matrix, target_features):
        neighbors = NearestNeighbors(n_neighbors=self.__args.n_neighbors, algorithm=self.__args.alg).fit(
            target_matrix.values)
        distances, indexes = neighbors.kneighbors(target_features)
        return distances, indexes
produtos.py 文件源码 项目:hybrid-rs-trainner 作者: SeniorSA 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def fit(atributos):
    neighbor = NearestNeighbors(metric='euclidean')
    neighbor.fit(atributos)
    return neighbor
word_vector_box.py 文件源码 项目:cervantes 作者: textclf 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def index(self, metric='cosine'):
        alg = 'brute' if (metric == 'cosine') else 'auto'
        if not SKLEARN:
            raise WordVectorBoxException("Needs sklearn to work")
        self._nn = NearestNeighbors(metric=metric, algorithm=alg)
        self._nn.fit(self.W)
        return self


问题


面经


文章

微信
公众号

扫码关注公众号