def random_selection(self, output_filename, nreads=None,
expected_coverage=None, reference_length=None):
"""Select random reads
:param nreads: number of reads to select randomly. Must be less than
number of available reads in the orignal file.
:param expected_coverage:
:param reference_length:
of expected_coverage and reference_length provided, nreads is replaced
automatically.
"""
assert output_filename != self.filename, \
"output filename should be different from the input filename"
self.reset()
if expected_coverage and reference_length:
mu = self.stats['mean']
nreads = int(expected_coverage * reference_length / mu)
assert nreads < len(self), "nreads parameter larger than actual Number of reads"
selector = random.sample(range(len(self)), nreads)
logger.info("Creating a pacbio BAM file with {} reads".format(nreads))
with pysam.AlignmentFile(output_filename,"wb", template=self.data) as fh:
for i, read in enumerate(self.data):
if i in selector:
fh.write(read)
评论列表
文章目录