def get_file_dic(path, options):
'''
This function opens supplied VCF files and saves metadata and variants in the dictionary spec_data.
'''
global sample_list
spec_data = [] # create empty list that will be populated which dictionaries of VCF file information
file_list = [f for f in os.listdir(path) if
options.file_pattern in f] # get file list from input path and fname pattern
for fname in file_list:
if not options.inverse_deletions: # retroseq results
# DO VCF to BED conversion
if fname.endswith(".vcf"):
subprocess32.call(["vcf2bed", "-d"], stdin=open(os.path.join(path, fname)), stdout=open(os.path.join(options.out_path, fname.replace(".vcf", ".bed")),"w"))
fname = fname.replace(".vcf", ".bed")
else:
copyfile(os.path.join(path, fname), os.path.join(options.out_path,fname))
fname_splitted = fname.split(".")
species_dict = {"sample": fname_splitted[0],
"fname": fname,
"ftype": fname_splitted[-1],
"meitype": fname_splitted[3],
"f_bedtools": BedTool(os.path.join(options.out_path, fname)),
"fname_sm": fname.replace(".gq.bed", ".sorted.merged.gq.bed")}
elif options.inverse_deletions:
fname_splitted = fname.split(".")
species_dict = {"sample": fname_splitted[0],
"fname": fname,
"ftype": fname_splitted[-1],
"meitype": fname_splitted[-2], #fname_splitted[2],
"f_bedtools": BedTool(os.path.join(path, fname)).saveas(os.path.join(options.out_path,fname)),
"fname_sm": fname.replace(".bed", ".sorted.merged.bed")}
print "\n loading %s" % fname,
print "\t BedTool object length: %i" % (len(species_dict.get("f_bedtools"))),
if len(species_dict.get("f_bedtools")) < 3 or species_dict.get(
"meitype") == "DNA": # filter out empty BedTool object and DNA insetions
continue
print "\t performing analyses: ",
for analyses in prep_analyses: # perform initial analyses
print "\t %s" % analyses.__name__,
species_dict["f_bedtools"] = analyses(species_dict.get("f_bedtools")).saveas(os.path.join(options.out_path, species_dict.get("fname_sm"))) #.saveas(os.path.join(options.out_path, species_dict.get("fname_sm"))) # save again to dictionary
# species_dict.get("f_bedtools").saveas(
# os.path.join(options.out_path, species_dict.get("fname_sm"))) # save to file
spec_data.append(species_dict) # append to list
sample_list = set([l.get("sample") for l in spec_data])
return spec_data
评论列表
文章目录