test_clustering.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:HiCembler 作者: lpryszcz 项目源码 文件源码
def get_subtrees0(d, bin_chr, bin_position, method="ward", nchrom=1000, distfrac=0.4):
    """Recomputing linkage is slower than pruning the tree for large matrices"""
    maxtdist = 0
    i = 0
    subtrees = []
    for i in range(1, nchrom):
        Z = fastcluster.linkage(d[np.triu_indices(d.shape[0], 1)], method=method)
        names = get_names(bin_chr, bin_position)
        t = distance_matrix2tree(Z, names)
        #tree = sch.to_tree(Z, False); t = ete3.Tree(getNewick(tree, "", tree.dist, names))

        # get longest branch
        n, bestdist = get_longest(t)
        tname, tdist = t.get_farthest_leaf()#[1]
        if maxtdist < tdist:
            maxtdist = t.get_farthest_leaf()[1]
        # break if small subtree
        if tdist / maxtdist < 1.1 * bestdist / tdist or tdist < maxtdist*distfrac:
            break

        pruned = n.get_leaf_names()         
        subtrees.append(pruned)

        c = Counter(get_chr_name(n) for n in pruned)
        print i, len(names), tdist, maxtdist, bestdist, len(pruned), c.most_common(5)        
        t = truncate(t, maxd=5); t.render('tree_%s.pdf'%i)
        # prune array        
        indices = np.array([False if name in set(pruned) else True for _i, name in enumerate(names)])
        d = d[indices, :]
        d = d[:, indices]
        bin_chr = bin_chr[indices]
        bin_position = bin_position[indices, :]

    if i:    
        subtrees.append(t.get_leaf_names())
        pruned = t.get_leaf_names()
        c = Counter(get_chr_name(n) for n in pruned)
        print i, len(names), tdist, maxtdist, bestdist, len(pruned), c.most_common(5)        
    return subtrees
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号