def iterative_consensus(sequences, program='muscle-medium', threshold=0.6,
subsample_size=200, maximum_subsample_size=1600):
"""
Compute a consensus sequence of the given sequences, but do not use all
sequences if there are many: First, try to compute the consensus from a
small subsample. If there are 'N' bases, increase the subsample size and
repeat until either there are no more 'N' bases, all available sequences
have been used or maximum_subsample_size is reached.
"""
while True:
sample = downsampled(sequences, subsample_size)
aligned = multialign(OrderedDict(enumerate(sample)), program=program)
cons = consensus(aligned, threshold=threshold).strip('N')
if 'N' not in cons:
# This consensus is good enough
break
if len(sequences) <= subsample_size:
# We have already used all the sequences that are available
break
subsample_size *= 2
if subsample_size > maximum_subsample_size:
break
return cons
评论列表
文章目录