def write_umi_info(pickles, filename):
"""" Write an H5 with (bc, chain, read_count) tuples """
filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
with tables.open_file(filename, 'w', filters=filters) as h5:
umi_info = vdj_umi_info.create_arrays(h5)
bc_to_int = {}
chain_to_int = {}
for pickle in pickles:
bc_chain_umi_counts = cPickle.load(open(pickle))
for bc, chain_umis in bc_chain_umi_counts.iteritems():
for chain, umi_counts in chain_umis.iteritems():
n_umis = len(umi_counts)
if chain != cr_constants.MULTI_REFS_PREFIX and n_umis > 0:
if bc not in bc_to_int:
bc_to_int[bc] = len(bc_to_int)
if chain not in chain_to_int:
chain_to_int[chain] = len(chain_to_int)
umi_info['barcode_idx'].append(np.full(n_umis, bc_to_int[bc],
dtype=vdj_umi_info.get_dtype('barcode_idx')))
umi_info['chain_idx'].append(np.full(n_umis, chain_to_int[chain],
dtype=vdj_umi_info.get_dtype('chain_idx')))
umi_info['reads'].append(np.fromiter(umi_counts.itervalues(),
vdj_umi_info.get_dtype('reads'), count=n_umis))
vdj_umi_info.set_ref_column(h5, 'barcodes', np.array(sorted(bc_to_int.keys(), key=bc_to_int.get)))
vdj_umi_info.set_ref_column(h5, 'chains', np.array(sorted(chain_to_int.keys(), key=chain_to_int.get)))
评论列表
文章目录