__main__.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:CNValloc 作者: m1m0r1 项目源码 文件源码
def bam2hist(args):
    samfile = pysam.Samfile(args.bam)

    regions = samfile
    (rname, start, end) = parse_region(args.region)

    refseq = None
    if args.genome:
        fasta = pysam.Fastafile(args.genome)
        refseq = fasta.fetch(reference=rname, start=start, end=end)

    if args.with_header:
        print ('rname', 'pos', 'offset',
               'ref',
               'nread', 'major_base', 'major_count', 'minor_count',
               'major_freq', 'minor_freq',
               'N', 'A', 'T', 'C' ,'G', 'D', 'ndel', 'nins',
              sep='\t')

    if args.use_multimaps:
        skip_flag = BAM_FLAGS.unmapped
    else:
        skip_flag = SKIP_FLAG

    for p in samfile.pileup(reference=rname, start=start, end=end, mask=skip_flag):
        if (p.pos < start) or (end is not None and end <= p.pos):  # skip outside of region
            continue

        h = ReadHist()
        for read in p.pileups:
            if read.query_position is None:
                continue
            info = ReadInfo(read, skip_flag=skip_flag)
            if info.skip_flag:
                continue
            h.add_read(info)
        major_base = h.major_base()
        major_count = getattr(h, major_base)
        minor_count = h.total_count() - major_count
        major_freq = h.major_freq()
        if major_freq > args.max_major_freq:
            continue
        print (rname, p.pos + 1, p.pos - start,
               '.' if refseq is None else refseq[p.pos - start],
               p.n, major_base, major_count, minor_count,
               '{0:.3f}'.format(major_freq), '{0:.3f}'.format(1 - major_freq),
               h.N, h.A, h.T, h.C, h.G, h.D, h.ndel, h.nins,
               sep='\t')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号