python类cdist()的实例源码-面圈网

utils.py 文件源码项目：sand-glyphs 作者: inconvergent 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def _spatial_sort(glyph):
  from scipy.spatial.distance import cdist
  from numpy import argsort
  from numpy import argmin

  curr = argmin(glyph[:,0])
  visited = set([curr])
  order = [curr]

  dd = cdist(glyph, glyph)

  while len(visited)<len(glyph):
    row = dd[curr,:]

    for i in argsort(row):
      if row[i]<=0.0 or i==curr or i in visited:
        continue
      order.append(i)
      visited.add(i)
      break
  glyph[:,:] = glyph[order,:]

sf_kmeans.py 文件源码项目：kmeans-service 作者: MAYHEM-Lab 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _center_mahalanobis(self, data):
        """
        Finds a point that is in the center of the data using Mahalanobis distance.

        Parameters
        ----------
        data: input data as numpy array

        Returns
        -------
        mean: numpy array
        """
        distances = cdist(data, data, metric='mahalanobis', VI=self._inv_covar_matrices)
        sum_distances = np.sum(distances, axis=0)
        center_idx = np.argmin(sum_distances)
        return data[center_idx]

sf_kmeans.py 文件源码项目：kmeans-service 作者: MAYHEM-Lab 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def log_likelihood(self, data):
        nks = np.bincount(self.labels_, minlength=self.n_clusters)  # number of points in each cluster
        n, d = data.shape
        log_likelihood = 0
        covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data)
        covar_matrix_det_v = np.linalg.det(covar_matrices)
        self._inv_covar_matrices = self._matrix_inverses(covar_matrices)
        for k, nk in enumerate(nks):
            if self.verbose == 1:
                print('log_likelihood: covar_matrix_det = {}'.format(covar_matrix_det_v[k]))
            term_1 = nk * (np.log(float(nk)/n) - 0.5 * d * np.log(2*np.pi) - 0.5 * np.log(abs(covar_matrix_det_v[k])))
            cdist_result = cdist(data[self.labels_ == k], np.array([self.cluster_centers_[k]]), metric='mahalanobis', VI=self._inv_covar_matrices[k])
            cdist_no_nan = cdist_result[~np.isnan(cdist_result)]  #  to deal with nans returned by cdist
            term_2 = -0.5 * (np.sum(cdist_no_nan))
            k_sum = term_1 + term_2
            log_likelihood += k_sum
        if np.isnan(log_likelihood) or log_likelihood == float('inf'):
            raise Exception('ll is nan or inf')
        return log_likelihood

tfa.py 文件源码项目：brainiak 作者: brainiak 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def _assign_posterior(self):
        """assign posterior to prior based on Hungarian algorithm

        Returns
        -------
        TFA
            Returns the instance itself.
        """

        prior_centers = self.get_centers(self.local_prior)
        posterior_centers = self.get_centers(self.local_posterior_)
        posterior_widths = self.get_widths(self.local_posterior_)
        # linear assignment on centers
        cost = distance.cdist(prior_centers, posterior_centers, 'euclidean')
        _, col_ind = linear_sum_assignment(cost)
        # reorder centers/widths based on cost assignment
        self.set_centers(self.local_posterior_, posterior_centers[col_ind])
        self.set_widths(self.local_posterior_, posterior_widths[col_ind])
        return self

leaflet-dask-delayed.py 文件源码项目：MDAnalysis-with-Dask 作者: Becksteinlab 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def Leaflet_finder(traj, i, j, ii, jj, len_chunks, cutoff):
    g1 = np.load(os.path.abspath(os.path.normpath(os.path.join(os.getcwd(),traj))), mmap_mode='r')[i:i+len_chunks]
    g2 = np.load(os.path.abspath(os.path.normpath(os.path.join(os.getcwd(),traj))), mmap_mode='r')[j:j+len_chunks]

    block = np.zeros((len(g1),len(g2)),dtype=float)
    block[:,:] = cdist(g1, g2) <= cutoff
    adj_list = np.where(block[:,:] == True)        
    adj_list = np.vstack(adj_list)

    adj_list[0] = adj_list[0]+ii*len_chunks
    adj_list[1] = adj_list[1]+jj*len_chunks

    if adj_list.shape[1] == 0:
        adj_list=np.zeros((2,1))

    graph = nx.Graph()
    edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
    graph.add_edges_from(edges)
    leaflet = sorted(nx.connected_components(graph), key=len, reverse=True)
    l_connected = [] # Keep only connected components
    for lng in range(len(leaflet)):
        l_connected.append(leaflet[lng])

    return list(l_connected)

leaflet-dask-array.py 文件源码项目：MDAnalysis-with-Dask 作者: Becksteinlab 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None):
    id_0 = block_id[0]
    id_1 = block_id[1]

    block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff 
    adj_list = np.where(block[:,:] == True)        
    adj_list = np.vstack(adj_list)

    adj_list[0] = adj_list[0]+id_0*len_chunks
    adj_list[1] = adj_list[1]+id_1*len_chunks

    if adj_list.shape[1] == 0:
        adj_list=np.zeros((2,1))

    graph = nx.Graph()
    edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
    graph.add_edges_from(edges)
    l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1)

    return l

leaflet-dask-array.py 文件源码项目：MDAnalysis-with-Dask 作者: Becksteinlab 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None):
    id_0 = block_id[0]
    id_1 = block_id[1]

    block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff 
    adj_list = np.where(block[:,:] == True)        
    adj_list = np.vstack(adj_list)

    adj_list[0] = adj_list[0]+id_0*len_chunks
    adj_list[1] = adj_list[1]+id_1*len_chunks

    if adj_list.shape[1] == 0:
        adj_list=np.zeros((2,1))

    graph = nx.Graph()
    edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
    graph.add_edges_from(edges)
    l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1)

    return l

similarity.py 文件源码项目：word2vec_pipeline 作者: NIHOPA 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def compute_document_similarity(X):
    '''
    From a matrix of unit distances, computes the cosine similarity
    then changes to the angular distance (for a proper metric).
    '''

    S = cdist(X, X, metric='cosine')
    S -= 1
    S *= -1
    S[S > 1] = 1.0
    S[S < 0] = 0.0

    # Set nan values to zero
    S[np.isnan(S)] = 0

    # Convert to angular distance (a proper metric)
    S = 1 - (np.arccos(S) / np.pi)
    assert(not np.isnan(S).any())
    assert(not np.isinf(S).any())

    return S

analyze_metaclusters.py 文件源码项目：word2vec_pipeline 作者: NIHOPA 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _compute_dispersion_matrix(X, labels):
    n = len(np.unique(labels))
    dist = np.zeros((n, n))
    ITR = list(itertools.combinations_with_replacement(range(n), 2))
    for i, j in tqdm(ITR):

        if i == j:
            d = pdist(X[labels == i], metric='cosine')
        else:
            d = cdist(X[labels == i], X[labels == j], metric='cosine')
            # Only take upper diagonal (+diagonal elements)
            d = d[np.triu_indices(n=d.shape[0], m=d.shape[1], k=0)]

        dist[i, j] = dist[j, i] = d.mean()

    return dist

mkProfiles.py 文件源码项目：PeARS-evaluation 作者: minimalparts 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def coherence(vecs):
    coh=0.0
    counter=0
    if len(vecs) > 1:
        matrix=np.array(vecs)
        #print matrix
        dist_m=distance.cdist(matrix,matrix,'cosine')
        #print dist_m
        for i in range(0,len(vecs)-1):
            for j in range(i+1,len(vecs)):
                cosine=1-dist_m[i][j]
                #print cosine
                coh+=cosine
                counter+=1      
        coh = float(coh)/float(counter)
    else:
        coh=1
    #print coh
    return coh



###########################################
# Compute profile
###########################################

W_means_class.py 文件源码项目：Multilevel-Wasserstein-Means 作者: moonfolk 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def update_atoms(S, b, H, a, Z, W, labels, weight=[1.,1.], max_iter=50):
    M = len(Z)
    M_h = [cdist(H[labels[m]].T, S.T, metric='euclidean') for m in range(M)]
    M_z = [cdist(Z[m].T, S.T, metric='euclidean') for m in range(M)]
    T_h = [algo3(a[labels[m]], b[m], M_h[m], max_iter=max_iter)[0] for m in range(M)]
    T_z = [algo3(W[m], b[m], M_z[m], max_iter=max_iter)[0] for m in range(M)]
    k = S.shape[1]
    for l in range(k):
        z_part = weight[0]*np.sum([(z*t[:,l]).sum(axis=1) for (z,t) in zip(Z, T_z)], axis=0)
        z_weight = weight[0]*np.sum([t[:,l].sum() for t in T_z])
        h_part = weight[1]*np.sum([(h*th[:,l]).sum(axis=1) for (h,th) in zip([H[labels[m]] for m in range(M)], T_h)], axis=0)
        h_weight = weight[1]*np.sum([th[:,l].sum() for th in T_h])
        S[:,l] = (z_part + h_part)/(z_weight + h_weight)

#################### Learning functions for algorithms

## No constraint algorithm

## Initialization based on k-means

similarity_recommender.py 文件源码项目：taar 作者: mozilla 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def compute_clients_dist(self, client_data):
        client_categorical_feats = [client_data.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
        client_continuous_feats = [client_data.get(specified_key) for specified_key in CONTINUOUS_FEATURES]

        # Compute the distances between the user and the cached continuous
        # and categorical features.
        cont_features = distance.cdist(self.continuous_features,
                                       np.array([client_continuous_feats]),
                                       'canberra')
        # The lambda trick is needed to prevent |cdist| from force-casting the
        # string features to double.
        cat_features = distance.cdist(self.categorical_features,
                                      np.array([client_categorical_feats]),
                                      lambda x, y: distance.hamming(x, y))

        # Take the product of similarities to attain a univariate similarity score.
        # Addition of 0.001 to the continuous features avoids a zero value from the
        # categorical variables, allowing categorical features precedence.
        return (cont_features + 0.001) * cat_features

annotation_distance_metrics.py 文件源码项目：pylidc 作者: pylidc 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def pairdist(ann1, ann2, which):
    """
    Compute the pairwise euclidean distance between 
    the contour boundary points, and return the 
    minimum, maximum, or average value.

    which: str
        One of 'min', 'max', or 'avg'.
    """
    dists = cdist(ann1.contours_to_matrix(0),
                  ann2.contours_to_matrix(0))

    if   which == 'min':
        return dists.min()
    elif which == 'max':
        return dists.max()
    elif which == 'avg':
        return dists.mean()
    else:
        raise ValueError('invalid `which` value.')

knn.py 文件源码项目：cebl 作者: idfah 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def __init__(self, classData, k=1, distMetric='euclidean', *args, **kwargs):
        Classifier.__init__(self, util.colmat(classData[0]).shape[1],
                            len(classData))

        self.k = k
        minObs = min([len(cls) for cls in classData])
        if self.k > minObs:
            raise Exception('k=%d exceeds the number of examples in smallest training class %d.' %
                            (k, minObs))

        if callable(distMetric):
            self.distFunc = lambda x1, x2: distMetric(x1, x2, *args, **kwargs)
        else:
            self.distFunc = lambda x1, x2: spdist.cdist(x1, x2, metric=distMetric)

        self.trainData = classData

demo_classification.py 文件源码项目：One-Shot-Learning-Demo 作者: llSourcell 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def modified_hausdorf_distance(itemA, itemB):
    # Modified Hausdorff Distance
    #
    # Input
    #  itemA : [n x 2] coordinates of black pixels
    #  itemB : [m x 2] coordinates of black pixels
    #
    #  M.-P. Dubuisson, A. K. Jain (1994). A modified hausdorff distance for object matching.
    #  International Conference on Pattern Recognition, pp. 566-568.
    #
    D = cdist(itemA, itemB)
    mindist_A = D.min(axis=1)
    mindist_B = D.min(axis=0)
    mean_A = np.mean(mindist_A)
    mean_B = np.mean(mindist_B)
    return max(mean_A, mean_B)

covfunc.py 文件源码项目：pyGPGO 作者: hawk31 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def l2norm_(X, Xstar):
    """
    Wrapper function to compute the L2 norm

    Parameters
    ----------
    X: np.ndarray, shape=((n, nfeatures))
        Instances.
    Xstar: np.ndarray, shape=((m, nfeatures))
        Instances

    Returns
    -------
    np.ndarray
        Pairwise euclidian distance between row pairs of `X` and `Xstar`.
    """
    return cdist(X, Xstar)

covfunc.py 文件源码项目：pyGPGO 作者: hawk31 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def kronDelta(X, Xstar):
    """
    Computes Kronecker delta for rows in X and Xstar.

    Parameters
    ----------
    X: np.ndarray, shape=((n, nfeatures))
        Instances.
    Xstar: np.ndarray, shape((m, nfeatures))
        Instances.

    Returns
    -------
    np.ndarray
        Kronecker delta between row pairs of `X` and `Xstar`.
    """
    return cdist(X, Xstar) < np.finfo(np.float32).eps

knnvisualization.py 文件源码项目：eqnet 作者: mast-group 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def get_knn_score_for(tree, k=5):
        tree = tree_copy_with_start(tree)
        tree_encoding = encoder.get_encoding([None, tree])  # This makes sure that token-based things fail
        tree_str_rep = str(tree)

        distances = cdist(np.atleast_2d(tree_encoding), encodings, 'cosine')
        knns = np.argsort(distances)[0]

        num_non_identical_nns = 0
        sum_equiv_nns = 0
        current_i = 0
        while num_non_identical_nns < k and current_i < len(knns) and eq_class_counts[
            tree.symbol] - 1 > num_non_identical_nns:
            expression_idx = knns[current_i]
            current_i += 1
            if eq_class_idx_to_names[expression_data[expression_idx]['eq_class']] == tree.symbol and str(
                    expression_data[expression_idx]['tree']) == tree_str_rep:
                continue  # This is an identical expression, move on
            num_non_identical_nns += 1
            if eq_class_idx_to_names[expression_data[expression_idx]['eq_class']] == tree.symbol:
                sum_equiv_nns += 1
        return "(%s-nn-stat: %s)" % (k, sum_equiv_nns / k)

MaternKernel.py 文件源码项目：kerpy 作者: oxmlcs 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def kernel(self, X, Y=None):

        GenericTests.check_type(X,'X',np.ndarray,2)
        # if X=Y, use more efficient pdist call which exploits symmetry
        if Y is None:
            dists = squareform(pdist(X, 'euclidean'))
        else:
            GenericTests.check_type(Y,'Y',np.ndarray,2)
            assert(shape(X)[1]==shape(Y)[1])
            dists = cdist(X, Y, 'euclidean')
        if self.nu==0.5:
            #for nu=1/2, Matern class corresponds to Ornstein-Uhlenbeck Process
            K = (self.sigma**2.) * exp( -dists / self.width )                 
        elif self.nu==1.5:
            K = (self.sigma**2.) * (1+ sqrt(3.)*dists / self.width) * exp( -sqrt(3.)*dists / self.width )
        elif self.nu==2.5:
            K = (self.sigma**2.) * (1+ sqrt(5.)*dists / self.width + 5.0*(dists**2.) / (3.0*self.width**2.) ) * exp( -sqrt(5.)*dists / self.width )
        else:
            raise NotImplementedError()
        return K

pi_cation.py 文件源码项目：mastic 作者: ADicksonLab 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def calc_arom_facing_norms(arom_a_coords, arom_b_coords):
    """Given two aromatic rings get the normal vectors that face the other ring"""

    centroids = [calc_centroid(arom_coords) for arom_coords in [arom_a_coords, arom_b_coords]]
    arom_norms = calc_arom_norms(arom_a_coords, arom_b_coords)

    face_norms = []
    for i, arom_norm in enumerate(arom_norms):
        # get the index of the other arom
        j = 1 if i ==0 else 0
        norm = calc_facing_vector(arom_norm + centroids[i], centroids[j])
        # norm_up = arom_norm
        # norm_down = -1 * arom_norm
        # # get the norm so that it points to the other ring
        # d_up = euclidean(norm_up + centroids[i], centroids[j])
        # d_down = cdist(norm_down + centroids[i], centroids[j])
        # norm = norm_up if d_up < d_down else norm_down
        face_norms.append(norm)

    return face_norms

pi_stacking.py 文件源码项目：mastic 作者: ADicksonLab 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def calc_arom_facing_norms(arom_a_coords, arom_b_coords):
    """Given two aromatic rings get the normal vectors that face the other ring"""

    centroids = calc_centroids(arom_a_coords, arom_b_coords)
    arom_norms = calc_arom_norms(arom_a_coords, arom_b_coords)

    face_norms = []
    for i, arom_norm in enumerate(arom_norms):
        # get the index of the other arom
        j = 1 if i ==0 else 0
        norm_up = arom_norm
        norm_down = -1 * arom_norm
        # get the norm so that it points to the other ring
        d_up = cdist([norm_up + centroids[i]], [centroids[j]])
        d_down = cdist([norm_down + centroids[i]], [centroids[j]])
        norm = norm_up if d_up < d_down else norm_down
        face_norms.append(norm)

    return face_norms

utils.py 文件源码项目：Steal-ML 作者: ftramer 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def min_l1_dist(m1, m2):
    assert len(m1) == len(m2)

    # pairwise l1 distances
    dist1 = cdist(m1, m2, 'minkowski', 1)
    dist2 = cdist(m1, -m2, 'minkowski', 1)

    neg = zip(*np.where(dist1 > dist2))

    dist = np.minimum(dist1, dist2)

    m = Munkres()
    matching = m.compute(dist.copy())

    total = 0.0
    for row, column in matching:
        value = dist[row][column]
        total += value

    return total, matching, neg

__main__.py 文件源码项目：picasso 作者: jungmannlab 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def _nneighbor(files):
    import glob
    import h5py as _h5py
    import numpy as np
    from scipy.spatial import distance
    paths = glob.glob(files)
    if paths:
        from . import io, postprocess
        from h5py import File
        for path in paths:
            print('Loading {} ...'.format(path))
            with _h5py.File(path, 'r') as locs_file:
                locs = locs_file['clusters'][...]
            clusters = np.rec.array(locs, dtype=locs.dtype)
            points = np.array(clusters[['com_x','com_y']].tolist())
            alldist = distance.cdist(points,points)
            alldist[alldist==0]=float('inf')
            minvals = np.amin(alldist,axis=0)
            base, ext = os.path.splitext(path)
            out_path = base + '_minval.txt'
            #np.savetxt(base + '_minval.txt', minvals, header='dx\tdy', newline='\r\n')
            np.savetxt(out_path, minvals, newline='\r\n')
            print('Saved filest o: {}'.format(out_path))

clustering.py 文件源码项目：tslearn 作者: rtavenar 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _assign(self, X, update_class_attributes=True):
        if self.metric == "euclidean":
            dists = cdist(X.reshape((X.shape[0], -1)), self.cluster_centers_.reshape((self.n_clusters, -1)),
                          metric="euclidean")
        elif self.metric == "dtw":
            dists = cdist_dtw(X, self.cluster_centers_)
        elif self.metric == "softdtw":
            dists = cdist_soft_dtw(X, self.cluster_centers_, gamma=self.gamma_sdtw)
        else:
            raise ValueError("Incorrect metric: %s (should be one of 'dtw', 'softdtw', 'euclidean')" % self.metric)
        matched_labels = dists.argmin(axis=1)
        if update_class_attributes:
            self.labels_ = matched_labels
            _check_no_empty_cluster(self.labels_, self.n_clusters)
            if self.dtw_inertia and self.metric != "dtw":
                inertia_dists = cdist_dtw(X, self.cluster_centers_)
            else:
                inertia_dists = dists
            self.inertia_ = _compute_inertia(inertia_dists, self.labels_, self._squared_inertia)
        return matched_labels

radial.py 文件源码项目：PyGeM 作者: mathLab 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def _distance_matrix(self, X1, X2):
        """
        This private method returns the following matrix:
        :math:`\\boldsymbol{D_{ij}} = \\varphi(\\| \\boldsymbol{x_i} -
                \\boldsymbol{y_j} \\|)`

        :param numpy.ndarray X1: the vector x in the formula above.
        :param numpy.ndarray X2: the vector y in the formula above.

        :return: matrix: the matrix D.
        :rtype: numpy.ndarray
        """
        matrix = cdist(
                X1, X2, lambda x, y: self.basis(x-y, self.parameters.radius)
        )
        return matrix

FITC_network.py 文件源码项目：deepGP_approxEP 作者: thangbui 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def estimate_ls(self, X):
        Ntrain = X.shape[0]
        if Ntrain < 10000:
            X1 = np.copy(X)
        else:
            randind = np.random.permutation(Ntrain)
            X1 = X[randind[0:(5*self.no_pseudos[0])], :]

        # d2 = compute_distance_matrix(X1)
        D = X1.shape[1]
        N = X1.shape[0]
        triu_ind = np.triu_indices(N)
        ls = np.zeros((D, ))
        for i in range(D):
            X1i = np.reshape(X1[:, i], (N, 1))
            d2i = cdist(X1i, X1i, 'euclidean')
            # d2i = d2[:, :, i]
            d2imed = np.median(d2i[triu_ind])
            # d2imed = 0.01
            # print d2imed, 
            ls[i] = np.log(d2imed + 1e-16)
        return ls

FITC_network.py 文件源码项目：deepGP_approxEP 作者: thangbui 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def estimate_ls_temp(self, X):
        Ntrain = X.shape[0]
        if Ntrain < 10000:
            X1 = np.copy(X)
        else:
            randind = np.random.permutation(Ntrain)
            X1 = X[randind[0:(5*self.no_pseudos[0])], :]

        dist = cdist(X1, X1, 'euclidean')

        # diff = X1[:, None, :] - X1[None, :, :]
        # dist = np.sum(abs(diff), axis=2)
        D = X1.shape[1]
        N = X1.shape[0]
        triu_ind = np.triu_indices(N)
        ls = np.zeros((D, ))
        d2imed = np.median(dist[triu_ind])
        for i in range(D):
            ls[i] = np.log(d2imed + 1e-16)
        return ls

data_utils.py 文件源码项目：Shoe-Shape-Classifier 作者: jrzaurin 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def avg_within_ss(X, k):
    """
    Compute the average within-cluster sum of squares. The code here can be
    found "almost" anywhere online

    Params:
    --------
    X: numpy array with observations and features to be clustered
    k: number of clusters

    Returns:
    --------
    avgwithinss: average within-cluster sum of squares
    """

    model = MiniBatchKMeans(init='k-means++', n_clusters=k, batch_size=50,
                          n_init=3, max_no_improvement=10, verbose=0)
    model.fit(X)

    centroids = model.cluster_centers_
    dist_c = cdist(X, centroids, 'euclidean')
    dist   = np.min(dist_c, axis=1)
    avgwithinss = sum(dist**2)/X.shape[0]

    return avgwithinss

SDH.py 文件源码项目：LearnHash 作者: galad-loth 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def EvalSHD(data,model):
    dataNum,dataDim=data.shape
    distToAnchors=cdist(model["anchorData"], data,'euclidean')
    sigmaRBF=model["sigmaRBF"]
    phi=npy.exp(-npy.square(distToAnchors)/sigmaRBF/sigmaRBF/2)
    matP=model["matProj"]
    projData=npy.dot(matP.T,phi)
    code=npy.ones(projData.shape,dtype=npy.int32)
    code[projData<0]=-1
    if 0==code.shape[0]%8:
        codeByteNum=code.shape[0]/8
    else:
        codeByteNum=1+code.shape[0]/8
    compactCode=npy.zeros((code.shape[1],codeByteNum),dtype=npy.uint8)
    for kk in range(code.shape[0]):
        idxByte=kk/8
        idxBit=kk%8
        compactCode[code[kk,:]==1,idxByte]+=(1<<idxBit)
    return compactCode

GEOTOPIC.py 文件源码项目：LocationRecommendation 作者: eliceio 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def recommendNext(self, user_id, current_loc):
        # indices should start from zero
        indices = user_id - 1
        # U x Z
        Theta = self.Theta[indices]
        # Z x I
        Phi = self.Phi
        beta = self.beta

        # U x I
        dists = np.exp(-0.5 * beta * np.square(cdist(current_loc, self.loc_info)))

        # Equation (8)
        # Z x U x I
        P_izu = np.expand_dims(np.exp(Phi), axis = 1) * np.expand_dims(dists, axis = 0)
        P_izu = P_izu / np.expand_dims(np.sum(P_izu, axis = 2), axis = 2)

        # Equation (9)
        # U x I
        P_iu = np.expand_dims(Theta.T, axis = 2) * P_izu
        P_iu = np.sum(P_iu, axis = 0)

        return P_iu