def fixmate(infile, outfile):
inbam = pysam.Samfile(infile, 'rb')
outbam = pysam.Samfile(outfile, 'wb', header=inbam.header,
referencenames=inbam.references)
qname = None
nTotal = 0
nFixed = 0
count = 0;
reads = []
gc.disable()
for rseq in inbam.fetch(until_eof=True):
nTotal += 1
if qname is None or qname == rseq.qname:
qname = rseq.qname
reads.append(rseq)
else:
count = process(reads, inbam.getrname)
if count > 0:
for r in reads:
outbam.write(r)
nFixed += count
qname = rseq.qname
del reads
reads = [rseq]
if nTotal % 200000 == 0:
logger.info('%d read(s) fixed' % nTotal)
gc.enable()
gc.disable()
count = process(reads, inbam.getrname)
if count > 0:
for r in reads:
outbam.write(r)
nFixed += count
logger.info('%d read(s) processed' % nTotal)
logger.info('%d read(s) fixed and written to file' % nFixed)
inbam.close()
outbam.close()
return (nTotal, nFixed)
评论列表
文章目录