molecule_counter.py 文件源码

python
阅读 36 收藏 0 点赞 0 评论 0

项目:cellranger 作者: 10XGenomics 项目源码 文件源码
def get_molecule_iter(self, barcode_length, subsample_rate=1.0):
        """ Return an iterator on Molecule tuples """
        assert subsample_rate >= 0 and subsample_rate <= 1.0

        # Store the previous compressed barcode so we don't have to decompress every single row
        prev_compressed_bc = None
        prev_gem_group = None
        prev_bc = None

        # Load the molecule data
        mol_barcodes = self.get_column('barcode')
        mol_gem_groups = self.get_column('gem_group')
        mol_genome_ints = self.get_column('genome')
        mol_gene_ints = self.get_column('gene')
        mol_reads = self.get_column('reads')

        gene_ids = self.get_ref_column('gene_ids')
        genome_ids = self.get_ref_column('genome_ids')

        if subsample_rate < 1.0:
            mol_reads = np.random.binomial(mol_reads, subsample_rate)

        for compressed_bc, gem_group, genome_int, gene_int, reads in itertools.izip(mol_barcodes,
                                                                                    mol_gem_groups,
                                                                                    mol_genome_ints,
                                                                                    mol_gene_ints,
                                                                                    mol_reads):
                if reads == 0:
                    continue

                # Decompress the cell barcode if necessary
                if compressed_bc == prev_compressed_bc and gem_group == prev_gem_group:
                    bc = prev_bc
                else:
                    bc = cr_utils.format_barcode_seq(self.decompress_barcode_seq(compressed_bc, barcode_length=barcode_length),
                                                     gem_group)
                yield Molecule(barcode=bc,
                               genome=genome_ids[genome_int],
                               gene_id=gene_ids[gene_int],
                               reads=reads)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号