def split(args):
chunk_mem_gb = cr_utils.get_mem_gb_request_from_barcode_whitelist(args.barcode_whitelist)
whitelist_mem_gb = cr_utils.get_mem_gb_request_from_barcode_whitelist(args.barcode_whitelist, args.gem_groups, use_min=False)
# Estimate the total number of rows in the final molecule info. Worst case.
total_reads = cr_utils.get_metric_from_json(args.extract_reads_summary, 'total_reads')
mol_info_rows = total_reads
# Memory for sorting in MoleculeCounter.concatenate_sort:
# N = total number of rows
# 8*N bytes to store the sort indices
# (8+8+8)*N bytes to load, concatenate, and index into a 64-bit data column
mol_info_mem_gb = int(math.ceil((32 * mol_info_rows)/1e9))
join_mem_gb = min(MAX_MEM_GB, max(cr_constants.MIN_MEM_GB, whitelist_mem_gb + mol_info_mem_gb))
chunks = []
for chunk_input in args.inputs:
chunks.append({
'chunk_input': chunk_input,
'__mem_gb': chunk_mem_gb,
})
join = {
'__mem_gb': join_mem_gb,
}
return {'chunks': chunks, 'join': join}
评论列表
文章目录