def extract_ner(sentences, ignore_entities=get_ignore_entities()):
try:
tagger = Ner(host=get_ner_host(), port=get_ner_port())
tic = time.clock()
extraction = []
for s, file in sentences:
output = tagger.get_entities(s.replace('\n', ' ').replace('\r', ''))
locations_found = [text for text, tag in output if
tag in ['LOCATION', 'PERSON'] and text.lower() not in ignore_entities]
if len(locations_found) > 0:
extraction.append(({'text': {'text': s, 'file': file}, 'entities': locations_found}))
tac = time.clock()
logger.info('NER extraction took {time}ms'.format(time=tac - tic))
return extraction
except Exception as detail:
logger.error('Error during ner extraction {}'.format(detail))
raise
评论列表
文章目录