motif_tools.py 文件源码

python
阅读 33 收藏 0 点赞 0 评论 0

项目:mbin 作者: fanglab 项目源码 文件源码
def shorten_motifs( contig_motifs, highscore_motifs ):
    """
    Keep only the shortest, most concise version of the high scoring
    motifs (reduces redundancy).
    """
    keeper_motifs    = set(highscore_motifs.keys())
    if len(highscore_motifs)>0:
        shortest_contiguous = min([len(m.split("-")[0]) for m in highscore_motifs.keys()])
        # (1) Sort by keys; shortest motif to longest
        motifs_s = sorted(highscore_motifs, key=len)
        # (2) For each motif, check if it's contained in a longer version of other motifs
        for m in motifs_s:
            motif_str =     m.split("-")[0]
            motif_idx = int(m.split("-")[1])
            for remaining in list(keeper_motifs):
                remaining_str =     remaining.split("-")[0]
                remaining_idx = int(remaining.split("-")[1])
                match         = re.search(motif_str, remaining_str)
                if match != None and (motif_idx + match.start()) == remaining_idx and len(remaining_str) > len(motif_str):
                    # 3. If True, remove the longer version
                    keeper_motifs.remove(remaining)
    return keeper_motifs
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号