def split(args):
assert len(args.read1s) == len(args.read2s)
chunks = []
# Determine the number of buckets required to achieve
# the given chunk size.
chunks_per_gem_group = {}
with open(args.reads_summary) as f:
reads_summary = json.load(f)
for gg in args.gem_groups:
readpairs = reads_summary['%d_total_reads_per_gem_group' % gg]
chunks_per_gem_group[str(gg)] = max(2,
int(math.ceil(float(readpairs) / \
args.readpairs_per_chunk)))
for fastq1, fastq2 in itertools.izip(args.read1s, args.read2s):
chunks.append({
'read1s_chunk': fastq1,
'read2s_chunk': fastq2,
'chunks_per_gem_group': chunks_per_gem_group,
})
return {'chunks': chunks}
评论列表
文章目录