def get_molecule_iter(self, barcode_length, subsample_rate=1.0):
""" Return an iterator on Molecule tuples """
assert subsample_rate >= 0 and subsample_rate <= 1.0
# Store the previous compressed barcode so we don't have to decompress every single row
prev_compressed_bc = None
prev_gem_group = None
prev_bc = None
# Load the molecule data
mol_barcodes = self.get_column('barcode')
mol_gem_groups = self.get_column('gem_group')
mol_genome_ints = self.get_column('genome')
mol_gene_ints = self.get_column('gene')
mol_reads = self.get_column('reads')
gene_ids = self.get_ref_column('gene_ids')
genome_ids = self.get_ref_column('genome_ids')
if subsample_rate < 1.0:
mol_reads = np.random.binomial(mol_reads, subsample_rate)
for compressed_bc, gem_group, genome_int, gene_int, reads in itertools.izip(mol_barcodes,
mol_gem_groups,
mol_genome_ints,
mol_gene_ints,
mol_reads):
if reads == 0:
continue
# Decompress the cell barcode if necessary
if compressed_bc == prev_compressed_bc and gem_group == prev_gem_group:
bc = prev_bc
else:
bc = cr_utils.format_barcode_seq(self.decompress_barcode_seq(compressed_bc, barcode_length=barcode_length),
gem_group)
yield Molecule(barcode=bc,
genome=genome_ids[genome_int],
gene_id=gene_ids[gene_int],
reads=reads)
评论列表
文章目录