python类pdist()的实例源码

recipe_clustering.py 文件源码 项目:Flavor-Network 作者: lingcheng99 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def plot_bokeh(df,sublist,filename):
    lenlist=[0]
    df_sub = df[df['cuisine']==sublist[0]]
    lenlist.append(df_sub.shape[0])
    for cuisine in sublist[1:]:
        temp = df[df['cuisine']==cuisine]
        df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
        lenlist.append(df_sub.shape[0])
    df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
    print df_X.shape, lenlist

    dist = squareform(pdist(df_X, metric='cosine'))
    tsne = TSNE(metric='precomputed').fit_transform(dist)
    #cannot use seaborn palette for bokeh
    palette =['red','green','blue','yellow']
    colors =[]
    for i in range(len(sublist)):
        for j in range(lenlist[i+1]-lenlist[i]):
            colors.append(palette[i])
    #plot with boken
    output_file(filename)
    source = ColumnDataSource(
            data=dict(x=tsne[:,0],y=tsne[:,1],
                cuisine = df_sub['cuisine'],
                recipe = df_sub['recipeName']))

    hover = HoverTool(tooltips=[
                ("cuisine", "@cuisine"),
                ("recipe", "@recipe")])

    p = figure(plot_width=1000, plot_height=1000, tools=[hover],
               title="flavor clustering")

    p.circle('x', 'y', size=10, source=source,fill_color=colors)

    show(p)
persistent_homology.py 文件源码 项目:OpenTDA 作者: outlace 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def buildGraph(data, epsilon=1., metric='euclidean', p=2):
    D = squareform(pdist(data, metric=metric, p=p))
    D[D >= epsilon] = 0.
    G = nx.Graph(D)
    edges = list(map(set, G.edges()))
    weights = [G.get_edge_data(u, v)['weight'] for u, v in G.edges()]
    return G.nodes(), edges, weights
restriction.py 文件源码 项目:ababe 作者: unkcpz 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def is_satisfied(self, gcell):
        scale = np.array([[2, 0, 0],
                          [0, 2, 0],
                          [0, 0, 2]])
        super_gcell = gcell.supercell(scale)
        target_cart = super_gcell.get_cartesian(ele=self.target_ele)
        # target_cart is a np array of target element's
        # cartesian coordinates
        mindist = np.min(pdist(target_cart))
        is_ok = mindist > self.target_dist
        # import pdb
        # pdb.set_trace()
        return is_ok
bkheatmap.py 文件源码 项目:bkheatmap 作者: wwliao 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cluster(df, metric="euclidean", method="single", row=True, column=True):
    row_linkmat, col_linkmat = None, None
    if row:
        distmat = dist.pdist(df, metric)
        row_linkmat = hier.linkage(distmat, method)
        df = df.iloc[hier.leaves_list(row_linkmat), :]
    if column:
        df = df.T
        distmat = dist.pdist(df, metric)
        col_linkmat = hier.linkage(distmat, method)
        df = df.iloc[hier.leaves_list(col_linkmat), :].T
    return df, row_linkmat, col_linkmat
transformation_tests_func.py 文件源码 项目:3D_Dense_Transformer_Networks 作者: JohnYC1995 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def makeT(self,cp):
        # cp: [(k*k*k) x 3] control points
        # T: [((k*k*k)+4) x ((k*k*k)+4)]
        K = cp.shape[0]
        T = np.zeros((K+4, K+4))
        T[:K, 0] = 1; T[:K, 1:4] = cp; T[K, 4:] = 1; T[K+1:, 4:] = cp.T
        R = squareform(pdist(cp, metric='euclidean'))
        R = R * R;R[R == 0] = 1 # a trick to make R ln(R) 0
        R = R * np.log(R)
        np.fill_diagonal(R, 0)
        T[:K, 4:] = R
        return T
dl_simulation.py 文件源码 项目:CS-SMAF 作者: brian-cleary 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def coherence(U,m):
    Phi = random_phi(m,U.shape[0])
    PU = Phi.dot(U)
    d = distance.pdist(PU.T,'cosine')
    return abs(1-d)
analyze_predictions.py 文件源码 项目:CS-SMAF 作者: brian-cleary 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def compare_distances(A,B,random_samples=[],s=200,pvalues=False):
    if len(random_samples) == 0:
        random_samples = np.zeros(A.shape[1],dtype=np.bool)
        random_samples[:min(s,A.shape[1])] = True
        np.random.shuffle(random_samples)
    dist_x = distance.pdist(A[:,random_samples].T,'euclidean')
    dist_y = distance.pdist(B[:,random_samples].T,'euclidean')
    pear = pearsonr(dist_x,dist_y)
    spear = spearmanr(dist_x,dist_y)
    if pvalues:
        return pear,spear
    else:
        return pear[0],spear[0]
stats.py 文件源码 项目:gpam_stats 作者: ricoms 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def n1_fraction_borderline(data):

    def get_n1_for_round(sparse_matrix, y):
        Tcsr = minimum_spanning_tree(sparse_matrix)
        borders = set()
        a = Tcsr.nonzero()[0]
        b = Tcsr.nonzero()[1]

        for i in range(len(a)):
            if (y[a[i]] != y[b[i]]):
                borders.add(a[i])
                borders.add(b[i])
        n1 = len(borders)
        return n1

    features = data.columns[:-1, ]
    dist = pdist(data[features], 'euclidean')
    df_dist = pd.DataFrame(squareform(dist))
    sparse_matrix = csr_matrix(df_dist.values)

    labels = data.columns[-1]
    y = data[labels]

    n1 = 0
    rounds = 10

    for round in range(rounds):
        n1 = n1 + get_n1_for_round(sparse_matrix, y)

    n = len(data)
    n1 = (1.0 * n1) / (rounds * n)

    return n1
stats.py 文件源码 项目:gpam_stats 作者: ricoms 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def n2_ratio_intra_extra_class_nearest_neighbor_distance(data):

    features = data.columns[:-1,]
    labels = data.columns[-1]

    dist    = pdist(data[features], 'euclidean')
    df_dist = pd.DataFrame(squareform(dist))

    max_size = df_dist.copy( )
    max_size.iloc[:, :] = False

    classes = data.iloc[ :, -1].unique()
    n = data.shape[0]

    n2 = 0
    cl = 'bla'
    intra_min = 0
    inter_min = 0
    for i in range(data.shape[0]):
        ci = data.iloc[i, -1]
        if ci != cl:
            cl = ci
            intra_idx = data[data[labels] == ci].index.values.tolist()
            inter_idx = data[data[labels] != ci].index.values
        intra_idx.remove(i)
        intra_min = intra_min + df_dist.iloc[intra_idx, i].min()
        inter_min = inter_min + df_dist.iloc[inter_idx, i].min()
        intra_idx.append(i)

    # tratar caso de inter_min == 0
    if inter_min == 0:
        inter_min = 1

    n2 = (1.0 * intra_min) / (1.0 * inter_min)

    return n2
cmaes.py 文件源码 项目:bolero 作者: rock-learning 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def is_behavior_learning_done(self):
        """Check if the optimization is finished.

        Returns
        -------
        finished : bool
            Is the learning of a behavior finished?
        """
        if self.it <= self.n_samples_per_update:
            return False

        if not np.all(np.isfinite(self.fitness)):
            return True

        # Check for invalid values
        if not (np.all(np.isfinite(self.invsqrtC)) and
                np.all(np.isfinite(self.cov)) and
                np.all(np.isfinite(self.mean)) and
                np.isfinite(self.var)):
            self.logger.info("Stopping: infs or nans" % self.var)
            return True

        if (self.min_variance is not None and
                np.max(np.diag(self.cov)) * self.var <= self.min_variance):
            self.logger.info("Stopping: %g < min_variance" % self.var)
            return True

        max_dist = np.max(pdist(self.fitness[:, np.newaxis]))
        if max_dist < self.min_fitness_dist:
            self.logger.info("Stopping: %g < min_fitness_dist" % max_dist)
            return True

        cov_diag = np.diag(self.cov)
        if (self.max_condition is not None and
                np.max(cov_diag) > self.max_condition * np.min(cov_diag)):
            self.logger.info("Stopping: %g / %g > max_condition"
                             % (np.max(self.cov), np.min(self.cov)))
            return True

        return False
precluster.py 文件源码 项目:texta 作者: texta-tk 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __call__(self):
        if len(self.words) == 0 or len(self.vectors) == 0:
            return []

        distance_matrix = scidist.pdist(np.array(self.vectors),self.metric)
        linkage_matrix = hier.linkage(distance_matrix,self.linkage)

        dendrogram = self._linkage_matrix_to_dendrogram(linkage_matrix,self.words,self.vectors)
        clusterings = self._create_clusterings(dendrogram)

        return [[(node.label,node.vector) for node in _get_cluster_nodes(cluster)] for cluster in self._find_optimal_clustering(clusterings)]
cnn.py 文件源码 项目:neuroevolution 作者: cosmoharrigan 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def calculate_fitness(feature_vectors):
    pairwise_euclidean_distances = distance.pdist(feature_vectors, 'euclidean')
    fitness = pairwise_euclidean_distances.mean() + \
              pairwise_euclidean_distances.min()
    return fitness
candidates.py 文件源码 项目:luna16 作者: gzuidhof 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def merge_candidates_scan(candidates, seriesuid, distance=5.):
    distances = pdist(candidates, metric='euclidean')
    adjacency_matrix = squareform(distances)

    # Determine nodes within distance, replace by 1 (=adjacency matrix)
    adjacency_matrix = np.where(adjacency_matrix<=distance,1,0)

    # Determine all connected components in the graph
    n, labels = connected_components(adjacency_matrix)
    new_candidates = np.zeros((n,3))

    # Take the mean for these connected components
    for cluster_i in range(n):
        points = candidates[np.where(labels==cluster_i)]
        center = np.mean(points,axis=0)
        new_candidates[cluster_i,:] = center

    x = new_candidates[:,0]
    y = new_candidates[:,1]
    z = new_candidates[:,2]
    labels = [seriesuid]*len(x)
    class_name = [0]*len(x)

    data= zip(labels,x,y,z,class_name)

    new_candidates = pd.DataFrame(data,columns=CANDIDATES_COLUMNS)

    return new_candidates
landmarks.py 文件源码 项目:lddmm-ot 作者: jeanfeydy 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def precompute_kernels(self, q) :
        """
        Returns a tuple of kernel, kernel', kernel'' matrices at position q.
        """
        x = q.reshape((self.npoints, self.dimension))
        dists = squareform(pdist(x, 'sqeuclidean'))
        K = exp(- dists / (2* self.kernel_scale ** 2))

        return (  K, 
                - K / (2* self.kernel_scale ** 2), 
                  K / (4* self.kernel_scale ** 4))
landmarks.py 文件源码 项目:lddmm-ot 作者: jeanfeydy 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def dq_Kqp_a(self,q,p,a, kernels) :
        """
        Useful for the adjoint integration scheme.
        d_q (K_q p) . a  = ...
        """
        h = 1e-8
        Q0phA = q + h*a
        Q0mhA = q - h*a
        update_emp =  (  Landmarks.K(self, Q0phA, p, Landmarks.precompute_kernels(self, Q0phA))
                      -  Landmarks.K(self, Q0mhA, p, Landmarks.precompute_kernels(self, Q0mhA))) / (2*h)
        return update_emp

        """x = q.reshape((self.npoints, self.dimension))
        p = p.reshape((self.npoints, self.dimension))
        a = a.reshape((self.npoints, self.dimension))
        dists = squareform(pdist(x, 'sqeuclidean')) # dists_ij       = |x_i-x_j|^2
        # We have :
        # [K_q p]_nd = sum_j { k(|x_n - x_j|^2) * p_j^d }
        #
        # So that :
        # grad_nd = a_nd * sum_j { 2 * (x_n^d - x_j^d) * k'(|x_n - x_j|^2) * p_j^d }
        grad = zeros((self.npoints, self.dimension))
        for d in range(self.dimension) :
            diffs = atleast_2d(x[:,d]).T - x[:,d]  # diffs_ij = x_i^d - x_j^d

            # K_ij = 2 * (x_i^d - x_j^d) * k'(|x_i - x_j|^2) * p_j^d
            K = 2 * dists * kernels[1] * p[:,d]
            # grad_nd =   a_nd * sum_j { 2 * (x_n^d - x_j^d) * k'(|x_n - x_j|^2) * p_j^d }
            grad[:,d] = a[:,d] * sum( K , 1 )
        return grad.reshape((self.npoints * self.dimension,))"""
ppdb_utils.py 文件源码 项目:Learning-sentence-representation-with-guidance-of-human-attention 作者: wangshaonan 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def getPairsFast(d, type):
    X = []
    T = []
    pairs = []
    for i in range(len(d)):
        (p1,p2) = d[i]
        X.append(p1.representation)
        X.append(p2.representation)
        T.append(p1)
        T.append(p2)

    arr = pdist(X,'cosine')
    arr = squareform(arr)

    for i in range(len(arr)):
        arr[i,i]=1
        if i % 2 == 0:
            arr[i,i+1] = 1
        else:
            arr[i,i-1] = 1

    arr = np.argmin(arr,axis=1)
    for i in range(len(d)):
        (t1,t2) = d[i]
        p1 = None
        p2 = None
        if type == "MAX":
            p1 = T[arr[2*i]]
            p2 = T[arr[2*i+1]]
        if type == "RAND":
            p1 = getPairRand(d,i)
            p2 = getPairRand(d,i)
        if type == "MIX":
            p1 = getPairMixScore(d,i,T[arr[2*i]])
            p2 = getPairMixScore(d,i,T[arr[2*i+1]])
        pairs.append((p1,p2))
    return pairs
lda_tuna.py 文件源码 项目:twitter_LDA_topic_modeling 作者: kenneth-orton 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def cao_juan_2009(topic_term_dists, num_topics):
    cos_pdists = squareform(pdist(topic_term_dists, metric='cosine')) 
    return np.sum(cos_pdists) / (num_topics*(num_topics - 1)/2)
lda_tuna.py 文件源码 项目:twitter_LDA_topic_modeling 作者: kenneth-orton 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def deveaud_2014(topic_term_dists, num_topics):
    jsd_pdists = squareform(pdist(topic_term_dists, metric=jensen_shannon)) 
    return np.sum(jsd_pdists) / (num_topics*(num_topics - 1))
utils.py 文件源码 项目:simec 作者: cod3licious 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def check_embed_match(X_embed1, X_embed2):
    """
    Check whether the two embeddings are almost the same by computing their normalized euclidean distances
    in the embedding space and checking the correlation.
    Inputs:
        - X_embed1, X_embed2: two Nxd matrices with coordinates in the embedding space
    Returns:
        - r: Pearson correlation coefficient between the normalized distances of the points
    """
    D_emb1 = pdist(X_embed1, 'euclidean')
    D_emb2 = pdist(X_embed2, 'euclidean')
    D_emb1 /= D_emb1.max()
    D_emb2 /= D_emb2.max()
    return np.corrcoef(D_emb1, D_emb2)[0, 1]
shared.py 文件源码 项目:adversarial-variational-bayes 作者: gdikov 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def median_heuristic(y):
    """  Estimate RBF bandwith using median heuristic. 

    Parameters
    ----------
    y : (number of samples, dimension)-ndarray
        One row of y corresponds to one sample.

    Returns
    -------
    bandwidth : float
                Estimated RBF bandwith.

    """

    num_of_samples = y.shape[0]  # number of samples
    # if y contains more samples, then it is subsampled to this cardinality
    num_of_samples_used = 100

    # subsample y (if necessary; select '100' random y columns):
    if num_of_samples > num_of_samples_used:
        idx = choice(num_of_samples, num_of_samples_used, replace=False)
        y = y[idx]  # broadcasting

    dist_vector = pdist(y)  # pairwise Euclidean distances
    bandwith = median(dist_vector) / sqrt(2)

    return bandwith


问题


面经


文章

微信
公众号

扫码关注公众号