python类pairwise_distances()的实例源码

space.py 文件源码 项目:semspaces 作者: pmandera 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def all_distances(self, l1, metric='cosine'):
        """Return distance matrix with distances to all words."""

        l1_vecs = self.word_vectors_matrix(l1)
        l1_labels = [self.label(e) for e in l1]

        sims = self.pairwise_distances(l1_vecs, self.vectors, metric=metric)

        return pd.DataFrame(sims, l1_labels, self.words)
space.py 文件源码 项目:semspaces 作者: pmandera 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def pair_distance(self, w1, w2, metric='cosine'):
        """Calculate distance between two words."""

        distance = self.pairwise_distances(
            self.get_vector(w1),
            self.get_vector(w2), metric=metric)

        return distance[0, 0]
space.py 文件源码 项目:semspaces 作者: pmandera 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def matrix_distances(self, l1, l2=None, metric='cosine'):
        """Return distance matrix with distances between pairs of words."""

        l1_vecs = self.word_vectors_matrix(l1)
        l1_labels = [self.label(e) for e in l1]

        if l2 is None:
            sims = self.pairwise_distances(l1_vecs, metric=metric)
            l2 = l1
        else:
            l2_vecs = self.word_vectors_matrix(l2)
            l2_labels = [self.label(e) for e in l2]
            sims = self.pairwise_distances(l1_vecs, l2_vecs, metric=metric)

        return pd.DataFrame(sims, l1_labels, l2_labels)
tsne.py 文件源码 项目:CNN_Visualization 作者: albioTQ 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def computeProbabilities(X, perplexity=30.0, tolerance=1e-5):
    #Perform an initial dimensionality reduction
    pca = PCA(n_components=50)

    X = pca.fit_transform(X)

    numSamples = X.shape[0]

    P = np.zeros((numSamples, numSamples))

    D = pairwise_distances(X, squared=True)

    for i in range(numSamples):
        indices = np.concatenate((np.arange(i), np.arange(i + 1, numSamples)))

        distancesFromI = D[i, indices]

        sigma = binarySearch(computePerplexity, distancesFromI, tolerance, perplexity)

        precision = 1.0 / sigma
        #Compute a "row" of matrix P: the probabilities wrt point I
        PwrtI = np.exp(- distancesFromI * precision)
        PwrtI /= sum(PwrtI)
        #Insert an element corresponding to I wrt I
        PwrtI = np.concatenate((PwrtI[0:i], [0.0], PwrtI[i:numSamples]))
        #Insert the row
        P[i, :] = PwrtI

    return P
evaluation.py 文件源码 项目:SpindleNet 作者: yokattame 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def main(args):
  PF, PL, GF, GL = _get_test_data(args.result_dir)
  D = pairwise_distances(GF, PF, metric=args.method, n_jobs=-2)

  gallery_labels_set = np.unique(GL)

  for label in PL:
    if label not in gallery_labels_set:
      print 'Probe-id is out of Gallery-id sets.'

  Times = 100
  k = 20

  res = np.zeros(k)

  gallery_labels_map = [[] for i in xrange(gallery_labels_set.size)]
  for i, g in enumerate(GL):
    gallery_labels_map[g].append(i)

  for __ in xrange(Times):
    # Randomly select one gallery sample per label selected
    newD = np.zeros((gallery_labels_set.size, PL.size))
    for i, g in enumerate(gallery_labels_set):
      j = np.random.choice(gallery_labels_map[g])
      newD[i, :] = D[j, :]
    # Compute CMC
    res += _cmc_core(newD, gallery_labels_set, PL, k)
  res /= Times

  for topk in [1, 5, 10, 20]:
    print "{:8}{:8.1%}".format('top-' + str(topk), res[topk - 1])
feat_test.py 文件源码 项目:DeepID2 作者: chenzeyuczy 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def getDist(feat1, feat2, metric):
    pair_num = len(feat1)
    import sklearn.metrics.pairwise as pw
    mt = pw.pairwise_distances(feat1, feat2, metric=metric)
    distance = np.empty((pair_num,))
    for i in xrange(pair_num):
        distance[i] = mt[i,i]
    return distance

# Extract feature via network.
graphssl.py 文件源码 项目:graph-based-semi-supervised-learning 作者: deerishi 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def constructCovarianceMatrix(self):

        #this function constructs the covariance matrix for the dataset and then does a label propagation over it

        self.covarianceMatrix=np.cov(self.trainVectorsPCA.T) #as numpy treats them as column vetcors
        self.inverseCovarianceMatrix=np.linalg.inv(self.covarianceMatrix)

        #compute the cholesky decomposition and then transform the data into the new space

        self.L_cov=np.linalg.cholesky(self.covarianceMatrix)
        self.allDataCov=np.dot(self.allDataPCA,self.L_cov.T)
        self.pwdis=pairwise_distances(self.allDataCov)
        self.D=np.zeros(self.pwdis.shape)
        projectedDigits=TSNE(random_state=randomState).fit_transform(self.allDataCov)
        plt.figure()
        plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.labels)
        plt.title('Data projected by Covariance Matrix in Mahalanobis metric')
        plt.savefig(pp,format='pdf')
        plt.close()

        ks=[3,5,7,10,12,15,20,22,25,27,30,33,35,37,40,43,45,47,50,53,55,57,60,65]
        accs=[]
        for k in ks:
            for i in range(0,self.pwdis.shape[0]):
                l1=self.pwdis[i].tolist()
                #print 'l1 is ',l1,'\n\n'
                allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
                #now set the all the weights except for k+1 to 0
                self.pwdis[i,allnearestNeighbours[k:]]=0
                self.D[i,i]=sum(self.pwdis[i]+0.01)

            print 'accuracy by using Covariance Matrix for Mahalanobis Distance for k= ',k,'\n'
            accs.append(self.labelPropogation())

        plt.figure()
        plt.plot(ks,accs)
        plt.title('Plot of accuracy vs k using Covariance Matrix in  Mahalanobis metric')
        plt.savefig(pp,format='pdf')
graphssl.py 文件源码 项目:graph-based-semi-supervised-learning 作者: deerishi 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def constructEucleadianGaussianKernel(self):

        self.pwdis=pairwise_distances(self.allDataPCA)

        maccs=[]
        ks=[3,5,7,10,12,15,20,22,25,27,30,33,35,37,40,43,45,47,50,53,55,57,60,65]
        for k in ks:
            sigmas=[1,1.5,2,2.5,3,3.5]
            accs=[]
            for sigma in sigmas:
                self.pwdis=-1*self.pwdis/(2*sigma*sigma)
                self.pwdis=np.exp(self.pwdis)
                self.D=np.zeros(self.pwdis.shape)
                for i in range(0,self.pwdis.shape[0]):
                    l1=self.pwdis[i].tolist()
                    #print 'l1 is ',l1,'\n\n'
                    allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
                    #now set the all the weights except for k+1 to 0
                    self.pwdis[i,allnearestNeighbours[k:]]=0
                    self.D[i,i]=sum(self.pwdis[i])

                    #here we make no trnasformation on the dataset, as this is simply the 
                print 'accuracy for constructEucleadianGaussianKernel with k=',k,' and sigma =',sigma,' is \n'
                accs.append(self.labelPropogation())
            maccs.append(np.mean(accs))

        plt.figure()
        plt.plot(ks,maccs)
        plt.title('Accuarcy vs k for Eucledian Gaussian Kernel')
        plt.savefig(pp,format='pdf')
        plt.close()
graphssl.py 文件源码 项目:graph-based-semi-supervised-learning 作者: deerishi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def constructEucleadianGaussianKernelNoPca(self):

        self.pwdis=pairwise_distances(self.allVectors)

        maccs=[]
        ks=[3,5,7,10,12,15,20,22,25,27,30,33,35,37,40,43,45,47,50,53,55,57,60,65]
        for k in ks:
            sigmas=[1,1.5,2,2.5,3,3.5]
            accs=[]
            for sigma in sigmas:
                self.pwdis=-1*self.pwdis/(2*sigma*sigma)
                self.pwdis=np.exp(self.pwdis)
                self.D=np.zeros(self.pwdis.shape)
                for i in range(0,self.pwdis.shape[0]):
                    l1=self.pwdis[i].tolist()
                    #print 'l1 is ',l1,'\n\n'
                    allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
                    #now set the all the weights except for k+1 to 0
                    self.pwdis[i,allnearestNeighbours[k:]]=0
                    self.D[i,i]=sum(self.pwdis[i])

                    #here we make no trnasformation on the dataset, as this is simply the 
                print 'accuracy for constructEucleadianGaussianKernel with k=',k,' and sigma =',sigma,' is \n'
                accs.append(self.labelPropogation())
            maccs.append(np.mean(accs))

        plt.figure()   
        plt.plot(ks,maccs)
        plt.title('Accuarcy vs k for Eucledian Gaussian Kernel')
        plt.savefig(pp,format='pdf')           
        plt.close()
graphssl.py 文件源码 项目:graph-based-semi-supervised-learning 作者: deerishi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def constructSimilartyMatrixCosine(self):
        #This is a simpole k nearest neighbour approach based on the cosine distance
        #for this takefrom modshogun import RealFeatures, MulticlassLabels
        #then find the k nearest neighbours for each node 

        #now we have all the pairwise cosine distances between all the sentences
        #now we need to do a knnNeighbour search
        #now we can construct the diagonal weight marix , which has the sum of all the weights
        ks=[3,5,7,10,12,15,20,22,25,27,30,33,35,37,40,43,45,47,50,53,55,57,60,65]
        accs=[]
        for k in ks:

            self.pwdis=pairwise_distances(self.allVectors,metric='cosine')
            self.D=np.zeros(self.pwdis.shape)
            for i in range(0,self.pwdis.shape[0]):
                l1=self.pwdis[i].tolist()
                #print 'l1 is ',l1,'\n\n'
                allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
                #now set the all the weights except for k+1 to 0
                self.pwdis[i,allnearestNeighbours[k:]]=0
                self.D[i,i]=sum(self.pwdis[i])

            print 'accuracy on non pca data using cosine and k= ',k,' is ','\n'
            accs.append(self.labelPropogation())

        plt.figure()
        plt.plot(ks,accs)
        plt.title('Plot of accuracy vs k using cosine non PCA data')
        plt.savefig(pp,format='pdf')
        plt.close()
graphssl.py 文件源码 项目:graph-based-semi-supervised-learning 作者: deerishi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def constructSimilartyMatrixCosinePCA(self):
        #This is a simpole k nearest neighbour approach based on the cosine distance
        #for this takefrom modshogun import RealFeatures, MulticlassLabels
        #then find the k nearest neighbours for each node 
        ks=[3,5,7,10,12,15,20,22,25,27,30,33,35,37,40,43,45,47,50,53,55,57,60,65]
        accs=[]
        for k in ks:
            self.pwdis=pairwise_distances(self.allDataPCA,metric='cosine')
            #now we have all the pairwise cosine distances between all the sentences
            #now we need to do a knnNeighbour search
            #now we can construct the diagonal weight marix , which has the sum of all the weights
            self.D=np.zeros(self.pwdis.shape)
            for i in range(0,self.pwdis.shape[0]):
                l1=self.pwdis[i].tolist()
                #print 'l1 is ',l1,'\n\n'
                allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
                #now set the all the weights except for k+1 to 0
                self.pwdis[i,allnearestNeighbours[k:]]=0
                self.D[i,i]=sum(self.pwdis[i])

            print 'Now computing accuracy for cosine metric on PCA data'
            accs.append(self.labelPropogation())

        plt.figure()
        plt.plot(ks,accs)
        plt.title('Plot of accuracy vs k using cosine  PCA data')
        plt.savefig(pp,format='pdf')    
        plt.close()

        #now we have the weight matrix graph based on the cosine distance
        #print 'self.D is ',self.D
generate_lsi_sim.py 文件源码 项目:kaggle-quora-solution-8th 作者: qqgeogor 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def calc_cosine_dist(text_a ,text_b):
    return pairwise_distances(text_a, text_b, metric='cosine')[0][0]
generate_tfidf_sim.py 文件源码 项目:kaggle-quora-solution-8th 作者: qqgeogor 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def calc_cosine_dist(text_a ,text_b):
    return pairwise_distances(text_a, text_b, metric='cosine')[0][0]
generate_sklearn_tfidf_sim.py 文件源码 项目:kaggle-quora-solution-8th 作者: qqgeogor 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def calc_cosine_dist(text_a ,text_b):
    return pairwise_distances(text_a, text_b, metric='cosine')[0][0]
generate_selftrained_glove_sim_dist_diff.py 文件源码 项目:kaggle-quora-solution-8th 作者: qqgeogor 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def calc_cosine_dist(text_a ,text_b, metric = 'euclidean'):
    return pairwise_distances([text_a], [text_b], metric = metric)[0][0]
rocchioclassifier.py 文件源码 项目:Quadflor 作者: quadflor 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def predict_proba(self, X):
        """
        Returns a matrix for each of the samples to belong to each of the classes.
        The matrix has shape = [n_samples, n_classes] where n_samples is the
        size of the first dimension of the input matrix X and n_classes is the number of
        classes as determined from the parameter 'y' obtained during training.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Prediction vector, where n_samples in the number of samples and
            n_features is the number of features.
        """
        probabilities = np.zeros((X.shape[0], self.y.shape[1]), dtype=np.float64)
        distances = (pairwise_distances(X, self.centroids_, metric=self.metric))

        # in order to get probability like values, we ensure that the closer
        # the distance is to zero, the closer the probability is to 1
        if(self.metric == 'cosine'):
            distances = 1 - distances
        else:
            # in the case of euclidean distance metric we need to normalize by the largest distance
            # to get a value between 0 and 1
            distances = 1 - (distances / distances.max())

        # map back onto a matrix containing all labels
        probabilities[:,self._mem_original_mapping] = distances

        return probabilities
solvers.py 文件源码 项目:ecml17 作者: gmum 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def assign_to_closest(X, centers, metric='euclidean'):
    return np.argmin(pairwise_distances(X, centers, metric=metric), axis=1)
gpu_profiling_magic.py 文件源码 项目:eucl_dist 作者: droyed 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def sq_cdist(A,B): return pairwise_distances(A,B, 'sqeuclidean')

# Sets of inputs
cpu_profiling_magic.py 文件源码 项目:eucl_dist 作者: droyed 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def sq_cdist(A,B): return pairwise_distances(A,B, 'sqeuclidean')

# Sets of input defining sizes
word2vec.py 文件源码 项目:tensorflow-playground 作者: wangz10 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def sort(self, word):
        '''
        Use an input word to sort words using cosine distance in ascending order
        '''
        assert word in self.dictionary
        i = self.dictionary[word]
        vec = self.final_embeddings[i].reshape(1, -1)
        # Calculate pairwise cosine distance and flatten to 1-d
        pdist = pairwise_distances(self.final_embeddings, vec, metric='cosine').ravel()
        return [self.reverse_dictionary[i] for i in pdist.argsort()]


问题


面经


文章

微信
公众号

扫码关注公众号