def metamap_wrapper(text):
"""
Function-wrapper for metamap binary. Extracts concepts
found in text.
!!!! REMEMBER TO START THE METAMAP TAGGER AND
WordSense DISAMBIGUATION SERVER !!!!
Input:
- text: str,
a piece of text or sentence
Output:
- a dictionary with key sents and values
a list of the concepts found
"""
# Tokenize into sentences
sents = sent_tokenize(text)
# Load Metamap Instance
mm = MetaMap.get_instance(settings['load']['path']['metamap'])
concepts, errors = mm.extract_concepts(sents, range(len(sents)))
# Keep the sentence ids
ids = np.array([int(concept[0]) for concept in concepts])
sentences = []
for i in xrange(len(sents)):
tmp = {'sent_id': i+1, 'entities': [], 'relations': []}
# Wanted concepts according to sentence
wanted = np.where(ids == i)[0].tolist()
for w_ind in wanted:
w_conc = concepts[w_ind]
if hasattr(w_conc, 'cui'):
tmp_conc = {'label': w_conc.preferred_name, 'cui': w_conc.cui,
'sem_types': w_conc.semtypes, 'score': w_conc.score}
tmp['entities'].append(tmp_conc)
sentences.append(tmp)
if errors:
time_log('Errors with extracting concepts!')
time_log(errors)
return {'sents': sentences, 'sent_text':text}
评论列表
文章目录