vcf_merging.py 文件源码

python
阅读 32 收藏 0 点赞 0 评论 0

项目:VAPr 作者: ucsd-ccbb 项目源码 文件源码
def merge_vcfs(input_dir, output_dir, project_name, raw_vcf_path_list=None, vcfs_gzipped=False):
    """Merge vcf files into single multisample vcf, bgzip and index merged vcf file."""

    if raw_vcf_path_list is None:
        vcf_file_extension = BGZIPPED_VCF_EXTENSION if vcfs_gzipped else VCF_EXTENSION
        raw_vcf_path_list = _get_vcf_file_paths_list_in_directory(input_dir, vcf_file_extension)
        if len(raw_vcf_path_list) == 0:
            raise ValueError("No VCFs found with extension '{0}'.".format(vcf_file_extension))
    elif len(raw_vcf_path_list) == 0:
            raise ValueError("Input list of VCF files is empty.")

    if len(raw_vcf_path_list) > 1:
        bgzipped_vcf_path_list = set([bgzip_and_index_vcf(vcf_fp) for vcf_fp in raw_vcf_path_list])
        single_vcf_path = os.path.join(output_dir, project_name + VCF_EXTENSION)
        _merge_bgzipped_indexed_vcfs(bgzipped_vcf_path_list, single_vcf_path)
    else:
        file_name = os.path.basename(raw_vcf_path_list[0])  # w/o path
        single_vcf_path = os.path.join(output_dir, file_name)
        try:
            # move to output dir with same file name
            shutil.copyfile(raw_vcf_path_list[0], single_vcf_path)
        except shutil.SameFileError:
            # I ran into a case where there was a single input file, AND the input and output dirs were the same so it
            # was already where it needed to be.  In this case, an error is thrown because you can't copy a file to
            # itself, but that's cool, so just ignore it.
            pass

    return single_vcf_path
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号