run_diffacto.py 文件源码

python
阅读 37 收藏 0 点赞 0 评论 0

项目:diffacto 作者: statisticalbiotechnology 项目源码 文件源码
def parsimony_grouping(g, peps):
    ''' Group peptides to proteins using the rule of parsimony
    Inputs:
        g:  an undirected graph with peptide <-> protein as edges
        peps: the set of peptide sequences, nodes not listed in the peptide set
              are protein IDs. 
    '''
    not_peps = set(g.nodes()) - set(peps)
    prot_groups = dict()
    for cc in nx.connected_component_subgraphs(g):
        in_group_peptides = set(cc.nodes()) - not_peps
        in_group_proteins = not_peps.intersection(cc.nodes())

        if len(in_group_proteins) == 1:
            prot_groups[in_group_proteins.pop()] = in_group_peptides
        elif len(in_group_proteins) > 1:
            reported = set()
            while len(in_group_proteins - reported) > 0:
                candidate_proteins = sorted(in_group_proteins - reported,
                                            key=lambda p: (
                                                len(set(cc[p].keys()) - reported), p),
                                            reverse=True)
                p = candidate_proteins[0]
                current_peps = set(cc[p].keys())
                plabel = [p]
                for i in range(1, len(candidate_proteins)):
                    _p = candidate_proteins[i]
                    _peps = set(cc[_p].keys())
                    if _peps == current_peps:
                        plabel.append(_p)
                    if len(_peps - current_peps) == 0:
                        reported.add(_p)

                plabel = ';'.join(sorted(plabel))
                if len(current_peps - reported) > 0:
                    prot_groups[plabel] = current_peps
                    reported = reported.union(current_peps)
                reported.add(p)
    return prot_groups
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号