def shorten_motifs( contig_motifs, highscore_motifs ):
"""
Keep only the shortest, most concise version of the high scoring
motifs (reduces redundancy).
"""
keeper_motifs = set(highscore_motifs.keys())
if len(highscore_motifs)>0:
shortest_contiguous = min([len(m.split("-")[0]) for m in highscore_motifs.keys()])
# (1) Sort by keys; shortest motif to longest
motifs_s = sorted(highscore_motifs, key=len)
# (2) For each motif, check if it's contained in a longer version of other motifs
for m in motifs_s:
motif_str = m.split("-")[0]
motif_idx = int(m.split("-")[1])
for remaining in list(keeper_motifs):
remaining_str = remaining.split("-")[0]
remaining_idx = int(remaining.split("-")[1])
match = re.search(motif_str, remaining_str)
if match != None and (motif_idx + match.start()) == remaining_idx and len(remaining_str) > len(motif_str):
# 3. If True, remove the longer version
keeper_motifs.remove(remaining)
return keeper_motifs
评论列表
文章目录