def build_from_mol_counter(molecule_counter, subsample_rate=1.0,
subsample_result=None):
""" Construct a GeneBCMatrices object from a MoleculeCounter.
Args: subsample_result (dict) - Return some metrics results into this dict. """
# Reconstruct all barcode sequences in the original matrices
barcode_whitelist = cr_utils.load_barcode_whitelist(molecule_counter.get_barcode_whitelist())
barcode_length = molecule_counter.get_barcode_length() or len(barcode_whitelist[0])
gem_groups = molecule_counter.get_gem_groups()
barcode_seqs = cr_utils.format_barcode_seqs(barcode_whitelist, gem_groups)
# Reconstruct Gene tuples from the molecule info ref columns
gene_ids = molecule_counter.get_ref_column('gene_ids')
genome_ids = molecule_counter.get_ref_column('genome_ids')
gene_names = molecule_counter.get_ref_column('gene_names')
gene_tuples = [cr_constants.Gene(gid, gname, None, None, None) for (gid, gname) in itertools.izip(gene_ids, gene_names)]
genes = cr_utils.split_genes_by_genomes(gene_tuples, genome_ids)
matrices = GeneBCMatrices(genome_ids, genes, barcode_seqs)
# Track results of subsampling
reads = 0
for mol in molecule_counter.get_molecule_iter(barcode_length, subsample_rate=subsample_rate):
matrices.add(mol.genome, mol.gene_id, mol.barcode)
reads += mol.reads
if subsample_result is not None:
subsample_result['mapped_reads'] = reads
return matrices
评论列表
文章目录