def main(bam):
# get contig2size
#faidx = FastaIndex(fasta)
#contig2size = {c: faidx.id2stats[c][0] for c in faidx}
sam = pysam.Samfile(bam)
contig2size = {c: s for c, s in zip(sam.references, sam.lengths)}
# estimate on largest contigs
longest_contigs = sorted(contig2size, key=lambda x: contig2size[x], reverse=1)
totdata = [0, 0]
for c in longest_contigs[:25]:
data = bam2matches(bam, regions=[c], mapq=10)
print c, contig2size[c], 1.*data[0] / sum(data), sum(data)
for i in range(2):
totdata[i] += data[i]
data = totdata
print 1.*data[0] / sum(data), sum(data)
评论列表
文章目录