python类tabix_index()的实例源码-面圈网

matrix.py 文件源码项目：pheweb 作者: statgen 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def run(argv):

    if should_run():
        args = [
            ffi.new('char[]', sites_filepath.encode('utf8')),
            ffi.new('char[]', common_filepaths['pheno']('*').encode('utf8')),
            ffi.new('char[]', matrix_gz_tmp_filepath.encode('utf8'))
        ]
        lib.cffi_make_matrix(*args)
        os.rename(matrix_gz_tmp_filepath, matrix_gz_filepath)
        pysam.tabix_index(
            filename=matrix_gz_filepath, force=True,
            seq_col=0, start_col=1, end_col=1 # note: these are 0-based, but `/usr/bin/tabix` is 1-based
        )
    else:
        print('matrix is up-to-date!')

file_utils.py 文件源码项目：pheweb 作者: statgen 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def convert_VariantFile_to_IndexedVariantFile(vf_path, ivf_path):
    from .load.cffi._x import ffi, lib
    make_basedir(ivf_path)
    tmp_path = get_tmp_path(ivf_path)
    args = [
        ffi.new('char[]', vf_path.encode('utf8')),
        ffi.new('char[]', tmp_path.encode('utf8')),
        ffi.new('char[]', b'#'),
    ]
    lib.cffi_bgzip_file(*args)
    os.rename(tmp_path, ivf_path)

    pysam.tabix_index(
        filename=ivf_path, force=True,
        seq_col=0, start_col=1, end_col=1 # note: these are 0-based, but `/usr/bin/tabix` is 1-based
    )

non_coding_indel.py 文件源码项目：probabilistic2020 作者: KarchinLab 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def main(opts):
    # read in INDEL mutations
    indels = pd.read_csv(opts['input'], sep='\t')

    # pysam tabix uses 1-based coordinates
    pysam.tabix_index(opts['blacklist'], force=True,
                      seq_col=0, start_col=1, end_col=2)

    # query black list to find INDELs with no hits
    non_coding_ixs, coding_ixs = [], []
    black_list = pysam.Tabixfile(opts['blacklist'])
    for i, row in indels.iterrows():
        result = black_list.fetch(reference=row['Chromosome'],
                                  start=row['Start_Position'],
                                  end=row['End_Position'])
        if not list(result):
            non_coding_ixs.append(i)
        else:
            coding_ixs.append(i)
    black_list.close()

    # save non-coding indels
    indels.ix[non_coding_ixs, :].to_csv(opts['output'], sep='\t', index=False)
    indels.ix[coding_ixs, :].to_csv(opts['blacklist_output'], sep='\t', index=False)

dbprep.py 文件源码项目：OpEx 作者: RahmanTeam 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def indexFile(options):
    filename=options.output
    if not options.ensembl is None:
        sys.stdout.write('Compressing output file... ') 
        sys.stdout.flush()
        pysam.tabix_compress(filename,filename+'.gz',force=True)
        sys.stdout.write('OK\n')    
        sys.stdout.write('Indexing output file... ') 
        sys.stdout.flush()
        pysam.tabix_index(filename+'.gz', seq_col=2, start_col=4, end_col=5, meta_char='#',force=True)
        sys.stdout.write('OK\n')
    else:
        print 'Compressing file...'
        pysam.tabix_compress(filename,filename+'.gz',force=True)
        print 'Indexing file...'
        pysam.tabix_index(filename+'.gz', seq_col=1, start_col=2, end_col=2, meta_char='#',force=True)

# Sort records in file

tabix.py 文件源码项目：cellranger 作者: 10XGenomics 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def index_vcf(filename):
    pysam.tabix_index(filename, preset='vcf', force=True)

testAnnotator.py 文件源码项目：lapels 作者: shunping 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def batchTestHelper(self, modFile, pool, refLens):        
        tmpName = tempfile.mkstemp('.tsv')[1]
        tmpfp = open(tmpName, 'wb')
        for line in modFile:
            tmpfp.write(line)
        tmpfp.close()
        pysam.tabix_index(tmpName, force=True, seq_col=1, start_col=2, end_col=2, 
                      meta_char='#', zerobased=True)
        tmpName += '.gz'
        modFile.close()

        self.chromoID = '1'
        self.modobj = mod.Mod(tmpName)
        self.modobj.load(self.chromoID)

        for tup in pool:       
            bamIter=[Read(tup[0], tup[1]+1, tup[2]) for tup in pool]        

        a = annot.Annotator(self.chromoID, refLens[self.chromoID],
                                self.modobj, bamIter)
        results = a.execute()

        for i,res in enumerate(results):            
            self.assertEqual(polish(res[0]),pool[i][3])
            self.assertEqual(res[1], pool[i][4])
            self.assertEqual(res[2], pool[i][5])
            self.assertEqual(res[3], pool[i][6])
            self.assertEqual(res[4], pool[i][7])

        os.remove(tmpName)
        os.remove(tmpName+'.tbi')

testAnnotator.py 文件源码项目：lapels 作者: shunping 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def batchTestHelper(self, modFile, pool, refLens):                
        tmpName = tempfile.mkstemp('.tsv')[1]
        tmpfp = open(tmpName, 'wb')
        for line in modFile:
            tmpfp.write(line)
        tmpfp.close()
        pysam.tabix_index(tmpName, force=True, seq_col=1, start_col=2, end_col=2, 
                      meta_char='#', zerobased=True)
        tmpName += '.gz'
        modFile.close()

        self.chromoID = '1'
        self.modobj = mod.Mod(tmpName)
        self.modobj.load(self.chromoID)

        for tup in pool:       
            bamIter=[Read(tup[0], tup[1]+1, tup[2]) for tup in pool]        

        a = annot.Annotator(self.chromoID, refLens[self.chromoID],
                                self.modobj, bamIter)
        results = a.execute()

        for i,res in enumerate(results):            
            self.assertEqual(polish(res[0]),pool[i][3])
            self.assertEqual(res[1], pool[i][4])
            self.assertEqual(res[2], pool[i][5])
            self.assertEqual(res[3], pool[i][6])
            self.assertEqual(res[4], pool[i][7])

        os.remove(tmpName)
        os.remove(tmpName+'.tbi')

dbSNPDB.py 文件源码项目：CAVA 作者: RahmanTeam 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def indexFile(options):
    sys.stdout.write('Compressing output file ... ')
    sys.stdout.flush()
    pysam.tabix_compress(options.output, options.output + '.gz', force=True)
    sys.stdout.write('OK\n')
    sys.stdout.write('Indexing output file ... ')
    sys.stdout.flush()
    pysam.tabix_index(options.output + '.gz', seq_col=1, start_col=2, end_col=2, meta_char='#', force=True)
    sys.stdout.write('OK\n')

# Read records from file as a list

main.py 文件源码项目：CAVA 作者: RahmanTeam 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def indexFile(options):
    sys.stdout.write('Compressing output file... ')
    sys.stdout.flush()
    pysam.tabix_compress(options.output, options.output + '.gz', force=True)
    sys.stdout.write('OK\n')
    sys.stdout.write('Indexing output file... ')
    sys.stdout.flush()
    pysam.tabix_index(options.output + '.gz', seq_col=4, start_col=6, end_col=7, meta_char='#', force=True)
    sys.stdout.write('OK\n')


# CHeck if string is a number (integer)

merge.py 文件源码项目：pynnotator 作者: raonyguimaraes 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def __init__(self, vcffile=None):

        self.vcffile = vcffile

        self.filename = os.path.splitext(os.path.basename(str(vcffile)))[0]

        # create folder merge if it doesn't exists
        if not os.path.exists('merge'):
            os.makedirs('merge')
        # enter inside folder
        os.chdir('merge')

        self.annotation_files = OrderedDict()

        pysam.tabix_index('../snpeff/snpeff.output.vcf', preset='vcf')

        self.annotation_files['snpeff'] = {
            'info': 'EFF',
            'file': pysam.Tabixfile('../snpeff/snpeff.output.vcf.gz', 'r', encoding="utf-8")
        }

        pysam.tabix_index('../vep/vep.output.sorted.vcf', preset='vcf')

        self.annotation_files['vep'] = {
            'info': 'CSQ',
            'file': pysam.Tabixfile('../vep/vep.output.sorted.vcf.gz', 'r', encoding="utf-8")
        }

        pysam.tabix_index('../snpsift/snpsift.final.vcf', preset='vcf')

        self.annotation_files['vartype'] = {
            'info': 'VARTYPE,SNP,MNP,INS,DEL,MIXED,HOM,HET',
            'file': pysam.Tabixfile('../snpsift/snpsift.final.vcf.gz', 'r', encoding="utf-8")
        }

        pysam.tabix_index('../decipher/hi_predictions.vcf', preset='vcf')

        self.annotation_files['decipher'] = {
            'info': 'HI_PREDICTIONS',
            'file': pysam.Tabixfile('../decipher/hi_predictions.vcf.gz', 'r', encoding="utf-8")
        }

        pysam.tabix_index('../pynnotator/pynnotator.vcf', preset='vcf')

        # genomes1k dbsnp clinvar esp6500 ensembl_phen ensembl_clin
        self.pynnotator_tags = ['genomes1k', 'dbsnp', 'clinvar', 'esp6500', 'ensembl_phen', 'ensembl_clin']

        self.annotation_files['pynnotator'] = {
            'info': 'ALL',
            'file': pysam.Tabixfile('../pynnotator/pynnotator.vcf.gz', 'r', encoding="utf-8")
        }

        pysam.tabix_index('../func_pred/func_pred_sorted.vcf', preset='vcf')

        self.annotation_files['dbnfsp'] = {
            'info': 'dbNSFP_SIFT_score,dbNSFP_SIFT_converted_rankscore,dbNSFP_SIFT_pred,dbNSFP_Uniprot_acc_Polyphen2,dbNSFP_Uniprot_id_Polyphen2,dbNSFP_Uniprot_aapos_Polyphen2,dbNSFP_Polyphen2_HDIV_score,dbNSFP_Polyphen2_HDIV_rankscore,dbNSFP_Polyphen2_HDIV_pred,dbNSFP_Polyphen2_HVAR_score,dbNSFP_Polyphen2_HVAR_rankscore,dbNSFP_Polyphen2_HVAR_pred,dbNSFP_LRT_score,dbNSFP_LRT_converted_rankscore,dbNSFP_LRT_pred,dbNSFP_LRT_Omega,dbNSFP_MutationTaster_score,dbNSFP_MutationTaster_converted_rankscore,dbNSFP_MutationTaster_pred,dbNSFP_MutationTaster_model,dbNSFP_MutationTaster_AAE,dbNSFP_MutationAssessor_UniprotID,dbNSFP_MutationAssessor_variant,dbNSFP_MutationAssessor_score,dbNSFP_MutationAssessor_rankscore,dbNSFP_MutationAssessor_pred,dbNSFP_FATHMM_score,dbNSFP_FATHMM_converted_rankscore,dbNSFP_FATHMM_pred,dbNSFP_PROVEAN_score,dbNSFP_PROVEAN_converted_rankscore,dbNSFP_PROVEAN_pred,dbNSFP_Transcript_id_VEST3,dbNSFP_Transcript_var_VEST3,dbNSFP_VEST3_score,dbNSFP_VEST3_rankscore,dbNSFP_MetaSVM_score,dbNSFP_MetaSVM_rankscore,dbNSFP_MetaSVM_pred,dbNSFP_MetaLR_score,dbNSFP_MetaLR_rankscore,dbNSFP_MetaLR_pred,dbNSFP_Reliability_index,dbNSFP_M-CAP_score,dbNSFP_M-CAP_rankscore,dbNSFP_M-CAP_pred,dbNSFP_REVEL_score,dbNSFP_REVEL_rankscore,dbNSFP_MutPred_score,dbNSFP_MutPred_rankscore,dbNSFP_MutPred_protID,dbNSFP_MutPred_AAchange,dbNSFP_MutPred_Top5features,dbNSFP_CADD_raw,dbNSFP_CADD_raw_rankscore,dbNSFP_CADD_phred,dbNSFP_DANN_score,dbNSFP_DANN_rankscore,dbNSFP_fathmm-MKL_coding_score,dbNSFP_fathmm-MKL_coding_rankscore,dbNSFP_fathmm-MKL_coding_pred,dbNSFP_fathmm-MKL_coding_group,dbNSFP_Eigen_coding_or_noncoding,dbNSFP_Eigen-raw,dbNSFP_Eigen-phred,dbNSFP_Eigen-PC-raw,dbNSFP_Eigen-PC-phred,dbNSFP_Eigen-PC-raw_rankscore,dbNSFP_GenoCanyon_score,dbNSFP_GenoCanyon_score_rankscore,dbNSFP_integrated_fitCons_score,dbNSFP_integrated_fitCons_rankscore,dbNSFP_integrated_confidence_value,dbNSFP_GM12878_fitCons_score,dbNSFP_GM12878_fitCons_rankscore,dbNSFP_GM12878_confidence_value,dbNSFP_H1-hESC_fitCons_score,dbNSFP_H1-hESC_fitCons_rankscore,dbNSFP_H1-hESC_confidence_value,dbNSFP_HUVEC_fitCons_score,dbNSFP_HUVEC_fitCons_rankscore,dbNSFP_clinvar_rs,dbNSFP_clinvar_clnsig,dbNSFP_clinvar_trait,dbNSFP_clinvar_golden_stars',
            'file': pysam.Tabixfile('../func_pred/func_pred_sorted.vcf.gz', 'r', encoding="utf-8")
        }

        self.dbsnp = pysam.Tabixfile(settings.dbsnp, 'r', encoding="utf-8")