def get_bc_counts(genomes, genes, molecule_counter):
genome_ids = molecule_counter.get_column('genome')
genome_index = cr_reference.get_genome_index(genomes)
conf_mapped_reads = molecule_counter.get_column('reads')
barcodes = molecule_counter.get_column('barcode')
bc_counts = {}
for genome in genomes:
genome_id = cr_reference.get_genome_id(genome, genome_index)
genome_indices = genome_ids == genome_id
if genome_indices.sum() == 0:
# edge case - there's no data for this genome (e.g. empty sample, false barnyard sample, or nothing confidently mapped)
continue
bcs_for_genome = barcodes[genome_indices]
# only count UMIs with at least one conf mapped read
umi_conf_mapped_to_genome = conf_mapped_reads[genome_indices] > 0
bc_breaks = bcs_for_genome[1:] - bcs_for_genome[:-1]
bc_breaks = np.concatenate(([1], bc_breaks)) # first row is always a break
bc_break_indices = np.nonzero(bc_breaks)[0]
unique_bcs = bcs_for_genome[bc_break_indices]
umis_per_bc = np.add.reduceat(umi_conf_mapped_to_genome, bc_break_indices)
cmb_reads_per_bc = np.add.reduceat(conf_mapped_reads[genome_indices], bc_break_indices)
bc_counts[genome] = (unique_bcs, umis_per_bc, cmb_reads_per_bc)
return bc_counts
评论列表
文章目录