def count_reads_in_windows(bed_file, args):
# type: (str, Namespace) -> List[pd.DataFrame]
chromosome_size_dict = args.chromosome_sizes
chromosomes = natsorted(list(chromosome_size_dict.keys()))
parallel_count_reads = partial(_count_reads_in_windows, bed_file, args)
info("Binning chromosomes {}".format(", ".join([c.replace("chr", "")
for c in chromosomes])))
chromosome_dfs = Parallel(n_jobs=args.number_cores)(
delayed(parallel_count_reads)(chromosome_size_dict[chromosome],
chromosome, strand)
for chromosome, strand in product(chromosomes, ["+", "-"]))
info("Merging the bins on both strands per chromosome.")
both_chromosome_strand_dfs = [df_pair
for df_pair in _pairwise(chromosome_dfs)]
merged_chromosome_dfs = Parallel(
n_jobs=args.number_cores)(delayed(merge_chromosome_dfs)(df_pair)
for df_pair in both_chromosome_strand_dfs)
return merged_chromosome_dfs
评论列表
文章目录