def filter_bam_maxtags(obam_fn, ibam_fn, max_tags=1):
"""DOCSTRING
Args
Returns
"""
assert max_tags>0
# prepare files
ibam = pysam.Samfile(ibam_fn, 'rb')
obam = pysam.Samfile(obam_fn, 'wb', template=ibam)
# init
collapse_dict = defaultdict(list)
chr_list=[x['SN'] for x in ibam.header['SQ']]
input_counter = 0
output_counter = 0
for chr in chr_list:
# empty stack for each new chromosome
stack = []
last_pos = -1
for read in ibam.fetch(chr):
input_counter += 1
if not (input_counter % (5*(10**6)) ):
logger.debug('collapsed %i alignments'%input_counter)
if read.positions[0] > last_pos:
new_alignment_list, collapse_dict = collapse_stack(stack, collapse_dict, max_tags)
output_counter += len(new_alignment_list)
last_pos = read.positions[0]
stack = [read]
for new_alignment in new_alignment_list:
new_alignment.query_sequence = '*'
new_alignment.query_qualities = '0'
_ = obam.write(new_alignment)
else:
stack.append(read)
new_alignment_list, collapse_dict = collapse_stack(stack, collapse_dict, max_tags)
output_counter += len(new_alignment_list)
last_pos = read.positions[0]
for new_alignment in new_alignment_list:
new_alignment.query_sequence = '*'
new_alignment.query_qualities = '0'
_ = obam.write(new_alignment)
ibam.close()
obam.close()
#os.rename(obam_fn, ibam_fn)
#pysam.sort(obam_fn)
pysam.index(obam_fn)
logger.info('Input = %s; Output = %s; Redundancy = %.2f'%(input_counter,output_counter, 1-float(output_counter)/input_counter))
return
评论列表
文章目录