def extract(sentences, ignore_entities=get_ignore_entities()):
tic = time.clock()
nlp = pycorenlp.corenlp.StanfordCoreNLP("http://{0}:{1}/".format(get_ner_host(), get_ner_port()))
extraction = []
for s in sentences:
output = nlp.annotate(s, properties={"annotators": "ner", "outputFormat": "json"})
locations_found = [(t['originalText']) for t in output["sentences"][0]["tokens"] for item in output if
t['ner'] in ['LOCATION', 'PERSON'] and t[
'originalText'].lower() not in ignore_entities]
if len(locations_found) > 0:
extraction.append(({'text': s, 'entities': locations_found}))
tac = time.clock()
logger.info('NER extraction took {time}ms'.format(time=tac - tic))
return extraction
# Perform natural language processing to text, get annotated entities and entities relations
评论列表
文章目录