def split(args):
chunks = []
for reads_per_bc_file, bam, gem_group in itertools.izip(args.reads_per_bc,
args.barcode_chunked_bams,
args.chunk_gem_groups):
subsample_rate = args.subsample_rate[str(gem_group)]
with open(reads_per_bc_file) as f:
reads_per_bc = []
for line in f:
_, reads = line.strip().split()
reads_per_bc.append(float(reads) * subsample_rate)
max_reads = np.max(reads_per_bc + [0.0])
# vdj_asm is hard-coded to use a maximum of 200k reads / BC.
max_reads = min(MAX_READS_PER_BC, max_reads)
# The assembly step takes roughly num_reads * MEM_BYTES_PER_READ bytes of memory to complete each BC.
mem_gb = max(2.0, int(np.ceil(MEM_BYTES_PER_READ * max_reads / 1e9)))
chunks.append({
'chunked_bam': bam,
'gem_group': gem_group,
'__mem_gb': mem_gb,
})
# If there were no input reads, create a dummy chunk
if not chunks:
chunks.append({'chunked_bam': None})
return {'chunks': chunks, 'join': {'__threads': 4}}
评论列表
文章目录