gempro.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:ssbio 作者: SBRG 项目源码 文件源码
def uniprot_mapping_and_metadata(self, model_gene_source, custom_gene_mapping=None, outdir=None,
                                     set_as_representative=False, force_rerun=False):
        """Map all genes in the model to UniProt IDs using the UniProt mapping service.
        Also download all metadata and sequences.

        Args:
            model_gene_source (str): the database source of your model gene IDs.
                See: http://www.uniprot.org/help/api_idmapping
                Common model gene sources are:

                * Ensembl Genomes - ``ENSEMBLGENOME_ID`` (i.e. E. coli b-numbers)
                * Entrez Gene (GeneID) - ``P_ENTREZGENEID``
                * RefSeq Protein - ``P_REFSEQ_AC``

            custom_gene_mapping (dict): If your model genes differ from the gene IDs you want to map,
                custom_gene_mapping allows you to input a dictionary which maps model gene IDs to new ones.
                Dictionary keys must match model genes.
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            set_as_representative (bool): If mapped UniProt IDs should be set as representative sequences
            force_rerun (bool): If you want to overwrite any existing mappings and files

        """

        # Allow model gene --> custom ID mapping ({'TM_1012':'TM1012'})
        if custom_gene_mapping:
            genes_to_map = list(custom_gene_mapping.values())
        else:
            genes_to_map = [x.id for x in self.genes]

        # Map all IDs first to available UniProts
        genes_to_uniprots = bs_unip.mapping(fr=model_gene_source, to='ACC', query=genes_to_map)

        successfully_mapped_counter = 0
        for g in tqdm(self.genes):
            if custom_gene_mapping and g.id in custom_gene_mapping.keys():
                uniprot_gene = custom_gene_mapping[g.id]
            else:
                uniprot_gene = g.id

            if uniprot_gene not in list(genes_to_uniprots.keys()):
                log.debug('{}: unable to map to UniProt'.format(g.id))
            else:
                for mapped_uniprot in genes_to_uniprots[uniprot_gene]:
                    try:
                        uniprot_prop = g.protein.load_uniprot(uniprot_id=mapped_uniprot, download=True, outdir=outdir,
                                                              set_as_representative=set_as_representative,
                                                              force_rerun=force_rerun)
                    except HTTPError as e:
                        log.error('{}, {}: unable to complete web request'.format(g.id, mapped_uniprot))
                        print(e)
                        continue

                    if uniprot_prop.sequence_file or uniprot_prop.metadata_file:
                        successfully_mapped_counter += 1

        log.info('{}/{}: number of genes mapped to UniProt'.format(successfully_mapped_counter, len(self.genes)))
        log.info('Completed ID mapping --> UniProt. See the "df_uniprot_metadata" attribute for a summary dataframe.')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号