def extract_entities(text, json_={}):
"""
Extract entities from a given text using metamap and
generate a json, preserving infro regarding the sentence
of each entity that was found. For the time being, we preserve
both concepts and the entities related to them
Input:
- text: str,
a piece of text or sentence
- json_: dic,
sometimes the json to be returned is given to us to be enriched
Defaults to an empty json_
Output:
- json_: dic,
json with fields text, sents, concepts and entities
containg the final results
"""
json_['text'] = text
# Tokenize the text
sents = sent_tokenize(text)
json_['sents'] = [{'sent_id': i, 'sent_text': sent} for i, sent in enumerate(sents)]
json_['concepts'], _ = mmap_extract(text)
json_['entities'] = {}
for i, sent in enumerate(json_['sents']):
ents = metamap_ents(sent)
json_['entities'][sent['sent_id']] = ents
return json_
评论列表
文章目录