def uniprot_mapping_and_metadata(self, model_gene_source, custom_gene_mapping=None, outdir=None,
set_as_representative=False, force_rerun=False):
"""Map all genes in the model to UniProt IDs using the UniProt mapping service.
Also download all metadata and sequences.
Args:
model_gene_source (str): the database source of your model gene IDs.
See: http://www.uniprot.org/help/api_idmapping
Common model gene sources are:
* Ensembl Genomes - ``ENSEMBLGENOME_ID`` (i.e. E. coli b-numbers)
* Entrez Gene (GeneID) - ``P_ENTREZGENEID``
* RefSeq Protein - ``P_REFSEQ_AC``
custom_gene_mapping (dict): If your model genes differ from the gene IDs you want to map,
custom_gene_mapping allows you to input a dictionary which maps model gene IDs to new ones.
Dictionary keys must match model genes.
outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
were not created initially
set_as_representative (bool): If mapped UniProt IDs should be set as representative sequences
force_rerun (bool): If you want to overwrite any existing mappings and files
"""
# Allow model gene --> custom ID mapping ({'TM_1012':'TM1012'})
if custom_gene_mapping:
genes_to_map = list(custom_gene_mapping.values())
else:
genes_to_map = [x.id for x in self.genes]
# Map all IDs first to available UniProts
genes_to_uniprots = bs_unip.mapping(fr=model_gene_source, to='ACC', query=genes_to_map)
successfully_mapped_counter = 0
for g in tqdm(self.genes):
if custom_gene_mapping and g.id in custom_gene_mapping.keys():
uniprot_gene = custom_gene_mapping[g.id]
else:
uniprot_gene = g.id
if uniprot_gene not in list(genes_to_uniprots.keys()):
log.debug('{}: unable to map to UniProt'.format(g.id))
else:
for mapped_uniprot in genes_to_uniprots[uniprot_gene]:
try:
uniprot_prop = g.protein.load_uniprot(uniprot_id=mapped_uniprot, download=True, outdir=outdir,
set_as_representative=set_as_representative,
force_rerun=force_rerun)
except HTTPError as e:
log.error('{}, {}: unable to complete web request'.format(g.id, mapped_uniprot))
print(e)
continue
if uniprot_prop.sequence_file or uniprot_prop.metadata_file:
successfully_mapped_counter += 1
log.info('{}/{}: number of genes mapped to UniProt'.format(successfully_mapped_counter, len(self.genes)))
log.info('Completed ID mapping --> UniProt. See the "df_uniprot_metadata" attribute for a summary dataframe.')
评论列表
文章目录