def estimateInsertSizeDistribution(bamfile,
alignments=50000):
'''estimate insert size from first alignments in bam file.
returns mean and stddev of insert sizes.
'''
assert isPaired(bamfile), \
'can only estimate insert size from' \
'paired bam files'
samfile = pysam.AlignmentFile(bamfile)
# only get positive to avoid double counting
inserts = numpy.array(
[read.tlen for read in samfile.head(alignments)
if read.is_proper_pair and read.tlen > 0])
insert_mean, insert_std = numpy.mean(inserts), numpy.std(inserts)
print('Insert mean of %f, with standard deviation of %f inferred' %
(insert_mean, insert_std))
if insert_mean > 10000 or insert_std > 1000 or \
insert_mean < 1 or insert_std < 1:
print('''WARNING: anomalous insert sizes detected. Please
double check or consider setting values manually.''')
return insert_mean, insert_std
评论列表
文章目录