pacbio.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:sequana 作者: sequana 项目源码 文件源码
def random_selection(self, output_filename, nreads=None,
            expected_coverage=None, reference_length=None):
        """Select random reads

        :param nreads: number of reads to select randomly. Must be less than
            number of available reads in the orignal file.
        :param expected_coverage:
        :param reference_length:

        of expected_coverage and reference_length provided, nreads is replaced
        automatically.
        """
        assert output_filename != self.filename, \
            "output filename should be different from the input filename"
        self.reset()

        if expected_coverage and reference_length:
            mu = self.stats['mean']
            nreads = int(expected_coverage * reference_length / mu)

        assert nreads < len(self), "nreads parameter larger than actual Number of reads"
        selector = random.sample(range(len(self)), nreads)
        logger.info("Creating a pacbio BAM file with {} reads".format(nreads))

        with pysam.AlignmentFile(output_filename,"wb", template=self.data) as fh:
            for i, read in enumerate(self.data):
                if i in selector:
                    fh.write(read)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号