def merge_vcfs(input_dir, output_dir, project_name, raw_vcf_path_list=None, vcfs_gzipped=False):
"""Merge vcf files into single multisample vcf, bgzip and index merged vcf file."""
if raw_vcf_path_list is None:
vcf_file_extension = BGZIPPED_VCF_EXTENSION if vcfs_gzipped else VCF_EXTENSION
raw_vcf_path_list = _get_vcf_file_paths_list_in_directory(input_dir, vcf_file_extension)
if len(raw_vcf_path_list) == 0:
raise ValueError("No VCFs found with extension '{0}'.".format(vcf_file_extension))
elif len(raw_vcf_path_list) == 0:
raise ValueError("Input list of VCF files is empty.")
if len(raw_vcf_path_list) > 1:
bgzipped_vcf_path_list = set([bgzip_and_index_vcf(vcf_fp) for vcf_fp in raw_vcf_path_list])
single_vcf_path = os.path.join(output_dir, project_name + VCF_EXTENSION)
_merge_bgzipped_indexed_vcfs(bgzipped_vcf_path_list, single_vcf_path)
else:
file_name = os.path.basename(raw_vcf_path_list[0]) # w/o path
single_vcf_path = os.path.join(output_dir, file_name)
try:
# move to output dir with same file name
shutil.copyfile(raw_vcf_path_list[0], single_vcf_path)
except shutil.SameFileError:
# I ran into a case where there was a single input file, AND the input and output dirs were the same so it
# was already where it needed to be. In this case, an error is thrown because you can't copy a file to
# itself, but that's cool, so just ignore it.
pass
return single_vcf_path
评论列表
文章目录