python类linkage()的实例源码

test_dynamicTreeCut.py 文件源码 项目:dynamicTreeCut 作者: kylessmith 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_cuttreeHybrid():
    from dynamicTreeCut import cutreeHybrid
    d = np.transpose(np.arange(1, 10001).reshape(100, 100))
    distances = pdist(d, "euclidean")
    link = linkage(distances, "average")
    test = cutreeHybrid(link, distances)

    true = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
            3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1]

    assert (test['labels'] == true).all()
    assert False
cloning_old.py 文件源码 项目:icing 作者: slipguru 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def formClusters(dists, link, distance):
    """Form clusters based on hierarchical clustering of input distance matrix
    with linkage type and cutoff distance
    :param dists: numpy matrix of distances
    :param link: linkage type for hierarchical clustering
    :param distance: distance at which to cut into clusters
    :return: list of cluster assignments
    """
    # Make distance matrix square
    dists = squareform(dists)
    # Compute linkage
    links = linkage(dists, link)

    # import matplotlib.pyplot as plt
    # from scipy.cluster import hierarchy
    # plt.figure(figsize=(15,5))
    # p = hierarchy.dendrogram(links)

    # Break into clusters based on cutoff
    clusters = fcluster(links, distance, criterion='distance')
    return clusters
ttclust.py 文件源码 项目:TTClust 作者: tubiana 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def generate_graphs(clusters_list, output, size, linkage, cutoff,distances):
    """
    DESCRIPTION
    Create a linear cluster mapping graph where every frame is printed as a
    colored barplot
    Args:
        clusters_labels (list): list of cluster number per frame
        output (string) output name for graph
    Return:
        colors_list (list) to be used with 2D distance projection graph
    """
    colors_list = plot_barplot(clusters_list, output, size)
    plot_dendro(linkage, output, cutoff, colors_list,clusters_list)
    plot_hist(clusters_list, output,colors_list)
    if (distances.shape[0] < 10000):
        implot(distances,output)
    else:
        printScreenLogfile("Too many frames! The RMSD distance matrix will not be generated")
    return colors_list
snapshot.py 文件源码 项目:eTraGo 作者: openego 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def linkage(df, n_groups):
    # create the distance matrix based on the forbenius norm: |A-B|_F where A is
    # a 24 x N matrix with N the number of timeseries inside the dataframe df
    # TODO: We can save have time as we only need the upper triangle once as the
    # distance matrix is symmetric
    if True:
        Y = np.empty((n_groups, n_groups,))
        Y[:] = np.NAN
        for i in range(len(Y)):
            for j in range(len(Y[i,:])):
                A = df.loc[i+1].values
                B = df.loc[j+1].values
                #print('Computing distance of:{},{}'.format(i,j))
                Y[i,j] = norm(A-B, ord='fro')

    # condensed distance matrix as vector for linkage (upper triangle as a vector)
    y = Y[np.triu_indices(n_groups, 1)]
    # create linkage matrix with wards algorithm an euclidean norm
    Z = hac.linkage(y, method='ward', metric='euclidean')
    # R = hac.inconsistent(Z, d=10)
    return Z
methods.py 文件源码 项目:South-African-Heart-Disease-data-analysis-using-python 作者: khushi4tiwari 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def hierarchicalClustering(X,y,Maxclust, C, Method = 'single', Metric = 'euclidean'):
    # Perform hierarchical/agglomerative clustering on data matrix

    Z = linkage(X, method=Method, metric=Metric)

    # Compute and display clusters by thresholding the dendrogram
    cls = fcluster(Z, criterion='maxclust', t=Maxclust)
    figure()
    #clusterplot(X, cls.reshape(cls.shape[0],1), y=y)
    clusterPlot(X, cls.reshape(cls.shape[0],1), Maxclust, C, y=y)

    # Display dendrogram
    max_display_levels=7
    figure()
    dendrogram(Z, truncate_mode='level', p=max_display_levels, color_threshold=0.5*np.max(Z[:,2]))
    title("Dendrgram of the Hierarchical Clustering")
    show()
array2scaffolds.py 文件源码 项目:HiCembler 作者: lpryszcz 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def array2tree(d, names, outbase="", method="ward"):
    """Return tree representation for array"""
    # cluster
    Z = sch.linkage(d[np.triu_indices(d.shape[0], 1)], method=method)

    # get ete Tree
    t = distance_matrix2tree(Z, names)

    # save tree & newick
    if outbase:
        pdf, nw = outbase+".nw.pdf", outbase+".nw"
        with open(nw, "w") as out:
            out.write(t.write())

        ts = ete3.TreeStyle()
        ts.show_leaf_name = False
        ts.layout_fn = mylayout
        t.render(pdf, tree_style=ts)

    return t
test_hierarchical.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_linkage_misc():
    # Misc tests on linkage
    rng = np.random.RandomState(42)
    X = rng.normal(size=(5, 5))
    assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
    assert_raises(ValueError, linkage_tree, X, linkage='foo')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # test hierarchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)

    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hierarchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
test_hierarchical.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_structured_linkage_tree():
    # Check that we obtain the correct solution for structured linkage trees.
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    # Avoiding a mask with only 'True' entries
    mask[4:7, 4:7] = 0
    X = rng.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    for tree_builder in _TREE_BUILDERS.values():
        children, n_components, n_leaves, parent = \
            tree_builder(X.T, connectivity)
        n_nodes = 2 * X.shape[1] - 1
        assert_true(len(children) + n_leaves == n_nodes)
        # Check that ward_tree raises a ValueError with a connectivity matrix
        # of the wrong shape
        assert_raises(ValueError,
                      tree_builder, X.T, np.ones((4, 4)))
        # Check that fitting with no samples raises an error
        assert_raises(ValueError,
                      tree_builder, X.T[:0], connectivity)
test_hierarchical.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_unstructured_linkage_tree():
    # Check that we obtain the correct solution for unstructured linkage trees.
    rng = np.random.RandomState(0)
    X = rng.randn(50, 100)
    for this_X in (X, X[0]):
        # With specified a number of clusters just for the sake of
        # raising a warning and testing the warning code
        with ignore_warnings():
            children, n_nodes, n_leaves, parent = assert_warns(
                UserWarning, ward_tree, this_X.T, n_clusters=10)
        n_nodes = 2 * X.shape[1] - 1
        assert_equal(len(children) + n_leaves, n_nodes)

    for tree_builder in _TREE_BUILDERS.values():
        for this_X in (X, X[0]):
            with ignore_warnings():
                children, n_nodes, n_leaves, parent = assert_warns(
                    UserWarning, tree_builder, this_X.T, n_clusters=10)

            n_nodes = 2 * X.shape[1] - 1
            assert_equal(len(children) + n_leaves, n_nodes)
test_hierarchical.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_scikit_vs_scipy():
    # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
    n, p, k = 10, 5, 3
    rng = np.random.RandomState(0)

    # Not using a lil_matrix here, just to check that non sparse
    # matrices are well handled
    connectivity = np.ones((n, n))
    for linkage in _TREE_BUILDERS.keys():
        for i in range(5):
            X = .1 * rng.normal(size=(n, p))
            X -= 4. * np.arange(n)[:, np.newaxis]
            X -= X.mean(axis=1)[:, np.newaxis]

            out = hierarchy.linkage(X, method=linkage)

            children_ = out[:, :2].astype(np.int)
            children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity)

            cut = _hc_cut(k, children, n_leaves)
            cut_ = _hc_cut(k, children_, n_leaves)
            assess_same_labelling(cut, cut_)

    # Test error management in _hc_cut
    assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
clustering.py 文件源码 项目:ccCluster 作者: gsantoni 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def tree(self):
        data = self.ccTable
        Matrix=np.zeros((self.Dimension,self.Dimension))

        reducedArray=[]
        for line in data:
                #print line
            if line is not None and len(line) is not 0:
                 Matrix[line[0],line[1]]= line[2]
                 Matrix[line[1],line[0]]= line[2]


        for x in range(0,self.Dimension):
            for y in range(x+1,self.Dimension):
                reducedArray.append(Matrix[x,y])

        Distances = np.array(reducedArray, dtype=(float))
        self.Tree =hierarchy.linkage(Distances, 'complete')

        return self.Tree

#new function, chose the average linkage
clustering.py 文件源码 项目:ccCluster 作者: gsantoni 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def avgTree(self):
        data = self.ccTable
        Matrix=np.zeros((self.Dimension,self.Dimension))

        reducedArray=[]
        for line in data:
                #print line
            if line is not None and len(line) is not 0:
                 Matrix[line[0],line[1]]= line[2]
                 Matrix[line[1],line[0]]= line[2]


        for x in range(0,self.Dimension):
            for y in range(x+1,self.Dimension):
                reducedArray.append(Matrix[x,y])

        Distances = np.array(reducedArray, dtype=(float))
        self.Tree =hierarchy.linkage(Distances, 'average')

        return self.Tree

#Funtion added to plot dendrogram in shell mode only.
#still not funtioninhg
#Uncomment when will be needed
metricsnamecluster.py 文件源码 项目:rca-evaluation 作者: sieve-microservices 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def cluster_words(words, service_name, size):
    stopwords = ["GET", "POST", "total", "http-requests", service_name, "-", "_"]
    cleaned_words = []
    for word in words:
        for stopword in stopwords:
            word = word.replace(stopword, "")
        cleaned_words.append(word)
    def distance(coord):
        i, j = coord
        return 1 - jaro_distance(cleaned_words[i], cleaned_words[j])
    indices = np.triu_indices(len(words), 1)
    distances = np.apply_along_axis(distance, 0, indices)
    return cluster_of_size(linkage(distances), size)
cluster.py 文件源码 项目:IgDiscover 作者: NBISweden 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def cluster_sequences(sequences, minsize=5):
    """
    Cluster the given sequences into groups of similar sequences.

    Return a triple that contains a pandas.DataFrame with the edit distances,
    the linkage result, and a list that maps sequence ids to their cluster id.
    If an entry is zero in that list, it means that the sequence is not part of
    a cluster.
    """
    matrix = distances(sequences)
    linkage = hierarchy.linkage(distance.squareform(matrix), method='average')
    # Linkage columns are:
    # 0, 1: merged clusters, 2: distance, 3: number of nodes in cluster
    inner = inner_nodes(hierarchy.to_tree(linkage))
    prev = linkage[:, 2].max()  # highest distance
    clusters = [0] * len(sequences)
    cl = 1
    for n in inner:
        if n.dist > 0 and prev / n.dist < 0.8 \
                and n.left.count >= minsize and n.right.count >= minsize:
            for id in collect_ids(n.left):
                # Do not overwrite previously assigned ids
                if clusters[id] == 0:
                    clusters[id] = cl
            cl += 1
        prev = n.dist
    # At the end of the above loop, we have not processed the rightmost
    # subtree. In our experiments, it never contains true novel sequences,
    # so we omit it.

    return pd.DataFrame(matrix), linkage, clusters
test.py 文件源码 项目:polo 作者: adrianveres 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def get_cell_data(n=50, seed=0):
    np.random.seed(seed)
    cells_data = np.load('./data/cells_data.npy')

    sample_cells = np.random.choice(cells_data.shape[0], n, replace=False)

    D = pdist(cells_data[sample_cells, :], 'euclidean')
    Z = linkage(D, 'ward')

    return cells_data, Z, D
test.py 文件源码 项目:polo 作者: adrianveres 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_random_data(n=50, seed=0):

    np.random.seed(seed)
    data = np.random.choice(10000, (n, 1), replace=False)
    D = pdist(data, 'euclidean')
    Z = linkage(D, 'ward')
    return data, Z, D
aesop.py 文件源码 项目:aesop 作者: BioMoDeL 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def plotDend(esd, filename=None):
    """Summary
    Function to display an electrostatic similarity dendrogram from a
    previously run ElecSimilarity class.

    Parameters
    ----------
    esd : ElecSimilarity class
        ElecSimilarity class containing final esd matrix.
    filename : str, optional
        If the resulting plot should be written to disk, specify a filename.
        Otherwise, the image will only be saved.

    Returns
    -------
    None
        Writes image to disk, if desired.
    """
    # plt.style.use('seaborn-talk')
    fig, ax = plt.subplots(sharey=True)
    Z = cluster.linkage(esd.esd)
    cluster.dendrogram(
        Z,
        labels=esd.ids,
        leaf_rotation=90.,  # rotates the x axis labels
        leaf_font_size=8.,  # font size for the x axis labels
        ax=ax)
    plt.xlabel('Variants')
    plt.ylabel('ESD')
    plt.tight_layout()
    if filename is not None:
        fig.savefig(filename)
utils.py 文件源码 项目:deeppavlov 作者: deepmipt 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def build_clusters(predicted_scores, method='centroid'):
    """agglomerative clustering using predicted scores as distances

    Args:
        predicted_scores: predicted scores for all mentions in documents
        method: methods for calculating distance between clusters
            look at scipy.cluster.hierarchy.linkage documentation

    Returns:
        clustering, min_score and max_score in predicted_scores

    """
    print('building clusters')
    min_score = 1e10
    max_score = 0
    clustrering = []
    for doc_id in tqdm(range(len(predicted_scores))):
        scores = predicted_scores[doc_id]
        if len(scores) > 0:
            distances = []
            for i in range(len(scores)):
                for j in range(i + 1, len(scores)):
                    distances.append((scores[i, j] + scores[j, i]) / 2)
            c = linkage(distances, method=method)
            clustrering.append(c)
            min_score = min(min(c[:, 2]), min_score)
            max_score = max(max(c[:, 2]), max_score)
    print('clusters are built: min_score: {} max_score: {}'.format(min_score, max_score))
    return clustrering, min_score, max_score
distance_based.py 文件源码 项目:mitre 作者: gerberlab 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def tree_from_linkage_matrix(linkage, leaf_labels):
    """ Form an ete3.Tree from hierarchical linkage matrix.

    Linkage should be the matrix returned by hierarchy.linkage. 
    leaf_labels should be a vector of names for the nodes 
    corresponding to the clustered items. Internal nodes will be 
    named node0, node1, etc, in the order in which the 
    clusters they represent were formed. 

    returns: new Tree

    """
distance_based.py 文件源码 项目:mitre 作者: gerberlab 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cluster(target_sequence_ids, fasta_filename, method='average'):
    """ Form distance-based hierachical clustering of sequences.

    Looks up each entry in target_sequence_ids in the file 
    specified by fasta_filename to obtain an associated DNA 
    sequence. 

    In principle, we could just work with the Hamming distance, but 
    the sequences may be of different lengths (mostly small 
    differences.) So we need a more sophisticated approach: we use
    pairwise global alignment, scoring 0 for a match, -1 for mismatch,
    and -1.5 for opening or extending a gap. We then take the distance
    to be -1.0*(score). 

    UPGMA clustering is used when method='average', the default.

    Returns the distance matrix and the linkage matrix returned
    by the clustering routine.

    """
    # globalms arguments: seq1, seq2, match, mismatch, open, extend
    distance = lambda seq1, seq2: -1.0*(
        pairwise2.align.globalms(seq1,seq2,0,-1,-1.5,-1.5, score_only=True)
    )
    sequences = fasta_to_dict(fasta_filename)
    N = len(target_sequence_ids)
    distances = np.zeros((N,N))
    # fill in the upper triangle
    for i,seqid1 in enumerate(target_sequence_ids):
        seq1 = sequences[seqid1]
        for j_offset, seqid2 in enumerate(target_sequence_ids[i+1:]):
            j = j_offset + i + 1
            seq2 = sequences[seqid2]
            distances[i][j] = distance(seq1, seq2)
    # convert to the form expected by the scipy clustering routines
    y = squareform(distances,checks=False)
    return distances, hierarchy.linkage(y,method)
hierarchy.py 文件源码 项目:pysciencedock 作者: Kitware 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def hierarchy(data, axis, method, metric):
    if axis == 'columns':
        data = data.transpose()
    clusters = range(len(data.index), 2*len(data.index) - 1)
    result = pd.DataFrame(
        linkage(data, method=method, metric=metric),
        columns=['child1', 'child2', 'distance', 'size'],
        index=clusters)
    for col in ['child1', 'child2', 'size']:
        result[col] = result[col].astype(int)
    return result
bkheatmap.py 文件源码 项目:bkheatmap 作者: wwliao 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def cluster(df, metric="euclidean", method="single", row=True, column=True):
    row_linkmat, col_linkmat = None, None
    if row:
        distmat = dist.pdist(df, metric)
        row_linkmat = hier.linkage(distmat, method)
        df = df.iloc[hier.leaves_list(row_linkmat), :]
    if column:
        df = df.T
        distmat = dist.pdist(df, metric)
        col_linkmat = hier.linkage(distmat, method)
        df = df.iloc[hier.leaves_list(col_linkmat), :].T
    return df, row_linkmat, col_linkmat
metacluster.py 文件源码 项目:word2vec_pipeline 作者: NIHOPA 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def docv_centroid_order_idx(meta_clusters):
    dist = cdist(meta_clusters, meta_clusters, metric='cosine')

    # Compute the linkage and the order
    linkage = hierarchy.linkage(dist, method='average')
    d_idx = hierarchy.dendrogram(linkage, no_plot=True)["leaves"]

    return d_idx
cluster_2d_array_rows.py 文件源码 项目:nd_array 作者: KwatME 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cluster_2d_array_rows(array_2d,
                          linkage_method='average',
                          distance_function='euclidean'):
    """
    Cluster array_2d rows.
    Arguments:
        array_2d (array): (n_rows, n_columns)
        linkage_method (str): linkage method compatible for
            scipy.cluster.hierarchy.linkage
        distance_function (str | callable): distance function compatible for
            scipy.cluster.hierarchy.linkage
    Returns:
        array: (n_rows); clustered row indices
    """

    clustered_indices = dendrogram(
        linkage(array_2d, method=linkage_method, metric=distance_function),
        no_plot=True)['leaves']

    return array(clustered_indices)
clustering.py 文件源码 项目:dtaidistance 作者: wannesm 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self):
        """

        https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html:
        A (n-1) by 4 matrix Z is returned. At the i-th iteration, clusters with indices Z[i, 0] and Z[i, 1] are
        combined to form cluster n + i. A cluster with an index less than n corresponds to one of the original
        observations. The distance between clusters Z[i, 0] and Z[i, 1] is given by Z[i, 2]. The fourth value
        Z[i, 3] represents the number of original observations in the newly formed cluster.
        """
        self.linkage = None
        self.series = None
        self._series_y = None
        self.ts_height_factor = None
clustering.py 文件源码 项目:dtaidistance 作者: wannesm 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def maxnode(self):
        return len(self.series) - 1 + len(self.linkage)
clustering.py 文件源码 项目:dtaidistance 作者: wannesm 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_linkage(self, node):
        if node < len(self.series):
            return None
        idx = int(node - len(self.series))
        return self.linkage[idx]
clustering.py 文件源码 项目:dtaidistance 作者: wannesm 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def fit(self, series, *args, **kwargs):
        self.series = series
        self.linkage = []
        new_nodes = {i: i for i in range(len(series))}
        if self._model.merge_hook:
            old_merge_hook = self._model.merge_hook
        else:
            old_merge_hook = None

        def merge_hook(from_idx, to_idx, distance):
            # print('merge_hook', from_idx, to_idx)
            new_idx = len(self.series) + len(self.linkage)
            # print('adding to linkage: ', new_nodes[from_idx], new_nodes[to_idx], distance, 0)
            if new_nodes[from_idx] is None:
                raise Exception('Trying to merge series that is already merged')
            self.linkage.append((new_nodes[from_idx], new_nodes[to_idx], distance, 0))
            new_nodes[to_idx] = new_idx
            new_nodes[from_idx] = None
            if old_merge_hook:
                old_merge_hook(from_idx, to_idx, distance)

        self._model.merge_hook = merge_hook

        result = self._model.fit(series, *args, **kwargs)
        self._model.merge_hook = old_merge_hook
        return result
clustering.py 文件源码 项目:dtaidistance 作者: wannesm 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, dists_fun, dists_options):
        """Hierarchical clustering using the Scipy linkage function.

        This is the same but faster algorithm as available in Hierarchical (~10 times faster). But with less
        options to steer the clustering (e.g. no possibility to give weights). It still computes the entire
        distance matrix first and is thus not ideal for extremely large data sets.
        """
        super().__init__()
        self.dists_fun = dists_fun
        self.dists_options = dists_options
hierarchical_clustering.py 文件源码 项目:dtaidistance 作者: wannesm 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def example_naivehierarchicalclustering():
    """Naive hierarchical clustering algorithm using DTW and based on .

    For a more efficient approach, check:
    Mueen, A and Keogh, E, Extracting Optimal Performance from Dynamic Time Warping,
    Tutorial, KDD 2016
    http://www.cs.unm.edu/~mueen/DTW.pdf

    :return: None
    """
    series = [
        np.array([0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0]),
        np.array([0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0]),
        np.array([2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0]),
        np.array([2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0]),
        np.array([4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0])
    ]

    dists = dtw.distance_matrix_fast(series)
    print("Distance matrix:\n{}".format(dists))

    dists_cond = np.zeros(size_cond(len(series)))
    idx = 0
    for r in range(len(series)-1):
        dists_cond[idx:idx+len(series)-r-1] = dists[r, r+1:]
        idx += len(series)-r-1

    z = linkage(dists_cond, method='complete', metric='euclidean')
    print(z)

    fig, axes = plt.subplots(2, 1, figsize=(8, 3))
    for idx, serie in enumerate(series):
        serie += idx * 0.1
        axes[0].plot(serie, label=str(idx))
        axes[0].text(0 + 0.15 * (-1)**idx * idx, serie[0] + 0.15 * idx, idx)
        axes[0].add_line(Line2D([0, 0 + 0.15 * (-1)**idx * idx], [serie[0], serie[0] + 0.15 * idx],
                                linewidth=1, color='gray'))
    axes[0].legend(loc=1)
    dendrogram(z, ax=axes[1])
    plt.show(block=True)


问题


面经


文章

微信
公众号

扫码关注公众号