geocoder.py 文件源码-python代码片段

geocoder.py 文件源码

python

阅读 30 收藏 0 点赞 0 评论 0

项目：geocoder-ie 作者: devgateway 项目源码文件源码

def extract(sentences, ignore_entities=get_ignore_entities()):
    tic = time.clock()
    nlp = pycorenlp.corenlp.StanfordCoreNLP("http://{0}:{1}/".format(get_ner_host(), get_ner_port()))
    extraction = []

    for s in sentences:
        output = nlp.annotate(s, properties={"annotators": "ner", "outputFormat": "json"})
        locations_found = [(t['originalText']) for t in output["sentences"][0]["tokens"] for item in output if
                           t['ner'] in ['LOCATION', 'PERSON'] and t[
                               'originalText'].lower() not in ignore_entities]
        if len(locations_found) > 0:
            extraction.append(({'text': s, 'entities': locations_found}))

    tac = time.clock()
    logger.info('NER extraction took {time}ms'.format(time=tac - tic))
    return extraction


# Perform natural language processing to text, get annotated entities and entities relations