tpi_core.py 文件源码-python代码片段

def get_file_dic(path, options):
    '''
    This function opens supplied VCF files and saves metadata and variants in the dictionary spec_data.
    '''
    global sample_list

    spec_data = []  # create empty list that will be populated which dictionaries of VCF file information
    file_list = [f for f in os.listdir(path) if
                 options.file_pattern in f]  # get file list from input path and fname pattern

    for fname in file_list:

        if not options.inverse_deletions:  # retroseq results

            # DO VCF to BED conversion
            if fname.endswith(".vcf"):
                subprocess32.call(["vcf2bed", "-d"], stdin=open(os.path.join(path, fname)), stdout=open(os.path.join(options.out_path, fname.replace(".vcf", ".bed")),"w"))
                fname = fname.replace(".vcf", ".bed")
            else:
                copyfile(os.path.join(path, fname), os.path.join(options.out_path,fname))

            fname_splitted = fname.split(".")

            species_dict = {"sample": fname_splitted[0],
                            "fname": fname,
                            "ftype": fname_splitted[-1],
                            "meitype": fname_splitted[3],
                            "f_bedtools": BedTool(os.path.join(options.out_path, fname)),
                            "fname_sm": fname.replace(".gq.bed", ".sorted.merged.gq.bed")}

        elif options.inverse_deletions:

            fname_splitted = fname.split(".")
            species_dict = {"sample": fname_splitted[0],
                            "fname": fname,
                            "ftype": fname_splitted[-1],
                            "meitype": fname_splitted[-2],  #fname_splitted[2],
                            "f_bedtools": BedTool(os.path.join(path, fname)).saveas(os.path.join(options.out_path,fname)),
                            "fname_sm": fname.replace(".bed", ".sorted.merged.bed")}

        print "\n loading %s" % fname,
        print "\t BedTool object length: %i" % (len(species_dict.get("f_bedtools"))),

        if len(species_dict.get("f_bedtools")) < 3 or species_dict.get(
                "meitype") == "DNA":  # filter out empty BedTool object and DNA insetions
            continue
        print "\t performing analyses: ",
        for analyses in prep_analyses:  # perform initial analyses
            print "\t %s" % analyses.__name__,
            species_dict["f_bedtools"] = analyses(species_dict.get("f_bedtools")).saveas(os.path.join(options.out_path, species_dict.get("fname_sm"))) #.saveas(os.path.join(options.out_path, species_dict.get("fname_sm"))) # save again to dictionary

        # species_dict.get("f_bedtools").saveas(
        #     os.path.join(options.out_path, species_dict.get("fname_sm")))  # save to file
        spec_data.append(species_dict)  # append to list
    sample_list = set([l.get("sample") for l in spec_data])

    return spec_data