count_reads_in_windows.py 文件源码-python代码片段

count_reads_in_windows.py 文件源码

python

阅读 23 收藏 0 点赞 0 评论 0

def count_reads_in_windows(bed_file, args):
    # type: (str, Namespace) -> List[pd.DataFrame]

    chromosome_size_dict = args.chromosome_sizes
    chromosomes = natsorted(list(chromosome_size_dict.keys()))

    parallel_count_reads = partial(_count_reads_in_windows, bed_file, args)

    info("Binning chromosomes {}".format(", ".join([c.replace("chr", "")
                                                    for c in chromosomes])))

    chromosome_dfs = Parallel(n_jobs=args.number_cores)(
        delayed(parallel_count_reads)(chromosome_size_dict[chromosome],
                                      chromosome, strand)
        for chromosome, strand in product(chromosomes, ["+", "-"]))

    info("Merging the bins on both strands per chromosome.")
    both_chromosome_strand_dfs = [df_pair
                                  for df_pair in _pairwise(chromosome_dfs)]
    merged_chromosome_dfs = Parallel(
        n_jobs=args.number_cores)(delayed(merge_chromosome_dfs)(df_pair)
                                  for df_pair in both_chromosome_strand_dfs)

    return merged_chromosome_dfs