plotting.py 文件源码

python
阅读 38 收藏 0 点赞 0 评论 0

项目:treecat 作者: posterior 项目源码 文件源码
def plot_feature_overlap(df, cmap='binary', method='cluster'):
    """Plot feature-feature presence overlap of a pandas dataframe.

    Args:
        df: A pandas dataframe.
        cmap: A matplotlib colormap.
        method: Method of clustering, one of 'cluster' or 'tree'.
    """
    V = len(df.columns)
    present = (df == df).as_matrix().astype(np.float32)
    overlap = np.dot(present.T, present)
    assert overlap.shape == (V, V)

    # Sort features to make blocks contiguous.
    if method == 'tree':
        # TODO(fritzo) Fix this to not look awful.
        grid = make_complete_graph(V)
        weights = np.empty(grid.shape[1], dtype=np.float32)
        for k, v1, v2 in grid.T:
            weights[k] = overlap[v1, v2]
        edges = estimate_tree(grid, weights)
        order, order_inv = order_vertices(edges)
    elif method == 'cluster':
        distance = scipy.spatial.distance.pdist(overlap)
        clustering = scipy.cluster.hierarchy.complete(distance)
        order_inv = scipy.cluster.hierarchy.leaves_list(clustering)
    else:
        raise ValueError(method)
    overlap = overlap[order_inv, :]
    overlap = overlap[:, order_inv]
    assert overlap.shape == (V, V)

    pyplot.imshow(overlap**0.5, cmap=cmap)
    pyplot.axis('off')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号