def get_tmhmm_predictions(self, tmhmm_results, custom_gene_mapping=None):
"""Parse TMHMM results and store in the representative sequences.
This is a basic function to parse pre-run TMHMM results. Run TMHMM from the
web service (http://www.cbs.dtu.dk/services/TMHMM/) by doing the following:
1. Write all representative sequences in the GEM-PRO using the function ``write_representative_sequences_file``
2. Upload the file to http://www.cbs.dtu.dk/services/TMHMM/ and choose "Extensive, no graphics" as the output
3. Copy and paste the results (ignoring the top header and above "HELP with output formats") into a file and save it
4. Run this function on that file
Args:
tmhmm_results (str): Path to TMHMM results (long format)
custom_gene_mapping (dict): Default parsing of TMHMM output is to look for the model gene IDs. If
your output file contains IDs which differ from the model gene IDs, use this dictionary to map model
gene IDs to result file IDs. Dictionary keys must match model genes.
"""
# TODO: refactor to Protein class?
tmhmm_dict = ssbio.protein.sequence.properties.tmhmm.parse_tmhmm_long(tmhmm_results)
counter = 0
for g in tqdm(self.genes_with_a_representative_sequence):
if custom_gene_mapping:
g_id = custom_gene_mapping[g.id]
else:
g_id = g.id
if g_id in tmhmm_dict:
log.debug('{}: loading TMHMM results'.format(g.id))
if not tmhmm_dict[g_id]:
log.error("{}: missing TMHMM results".format(g.id))
g.protein.representative_sequence.annotations['num_tm_helix-tmhmm'] = tmhmm_dict[g_id]['num_tm_helices']
g.protein.representative_sequence.letter_annotations['TM-tmhmm'] = tmhmm_dict[g_id]['sequence']
counter += 1
else:
log.error("{}: missing TMHMM results".format(g.id))
log.info('{}/{}: number of genes with TMHMM predictions loaded'.format(counter, len(self.genes)))
### END SEQUENCE RELATED METHODS ###
####################################################################################################################
####################################################################################################################
### STRUCTURE RELATED METHODS ###
评论列表
文章目录