def removeReadsOverlappingHetRegion(inbamfn, bedfn,outbamfn,path):
print "___ removing reads overlapping heterozygous region ___"
inbamsorted = sub('.bam$','.sorted',inbamfn)
pysam.sort(inbamfn, inbamsorted)
pysam.index(inbamsorted+'.bam')
alignmentfile = pysam.AlignmentFile(inbamsorted+'.bam', "rb" )
outbam = pysam.Samfile(outbamfn, 'wb', template=alignmentfile )
bedfile = open(bedfn, 'r')
for bedline in bedfile:
c = bedline.strip().split()
if (len(c) == 3 ):
chr2 = c[0]
chr = c[0].strip("chr")
start = int(c[1])
end = int(c[2])
else :
continue
try:
readmappings = alignmentfile.fetch(chr2, start, end)
except ValueError as e:
print("problem fetching the read ")
for shortread in readmappings:
try:
outbam.write(shortread)
except ValueError as e:
print ("problem removing read :" + shortread.qname)
outbamsorted = sub('.bam$','.sorted',outbamfn)
pysam.sort(outbamfn, outbamsorted)
bamDiff(inbamsorted+'.bam', outbamsorted +'.bam', path )
outbam.close()
评论列表
文章目录