def bam2hist(args):
samfile = pysam.Samfile(args.bam)
regions = samfile
(rname, start, end) = parse_region(args.region)
refseq = None
if args.genome:
fasta = pysam.Fastafile(args.genome)
refseq = fasta.fetch(reference=rname, start=start, end=end)
if args.with_header:
print ('rname', 'pos', 'offset',
'ref',
'nread', 'major_base', 'major_count', 'minor_count',
'major_freq', 'minor_freq',
'N', 'A', 'T', 'C' ,'G', 'D', 'ndel', 'nins',
sep='\t')
if args.use_multimaps:
skip_flag = BAM_FLAGS.unmapped
else:
skip_flag = SKIP_FLAG
for p in samfile.pileup(reference=rname, start=start, end=end, mask=skip_flag):
if (p.pos < start) or (end is not None and end <= p.pos): # skip outside of region
continue
h = ReadHist()
for read in p.pileups:
if read.query_position is None:
continue
info = ReadInfo(read, skip_flag=skip_flag)
if info.skip_flag:
continue
h.add_read(info)
major_base = h.major_base()
major_count = getattr(h, major_base)
minor_count = h.total_count() - major_count
major_freq = h.major_freq()
if major_freq > args.max_major_freq:
continue
print (rname, p.pos + 1, p.pos - start,
'.' if refseq is None else refseq[p.pos - start],
p.n, major_base, major_count, minor_count,
'{0:.3f}'.format(major_freq), '{0:.3f}'.format(1 - major_freq),
h.N, h.A, h.T, h.C, h.G, h.D, h.ndel, h.nins,
sep='\t')
评论列表
文章目录