def filter_bam(input_file, output_file, downweight_number, ctot, gtoa):
"""
Takes a bam file as input and weights the quality of the reads down.
Need to ensure we write the header out :)
Investigate pysam and look for a header,
this should really help us understand how to get this bam filter working
and writing the bam files directly back out to the terminal.
"""
bam = pysam.Samfile(input_file,'rb')
bam_out = pysam.Samfile(output_file, 'wb',template=bam)
for line in bam:
change_bases_c = None
change_bases_t = None
seq = line.seq
qual = line.qual
if(ctot):
change_bases_c = [check_c_2_t(nuc) and i < downweight_number for i, nuc in enumerate(seq)]
if(gtoa):
change_bases_t = [check_g_2_a(nuc) and (len(seq)-i) <= downweight_number for i, nuc in enumerate(seq)]
new_qual = downweight_quality(qual,change_bases_c, change_bases_t)
line.qual = new_qual
bam_out.write(line)
评论列表
文章目录