def extract_nuggets(sentences, nugget_type, language):
'''
Parameter Arguments:
sentences: list of sentences
['Ney York is a city.', 'It has a huge population.']
return: a list of noun phrases, events, named_entities
[('new', 'york'), ('york', 'is'), ('a', 'city'),
('it', 'has'), ('has','a'), ('a', 'huge'), ('huge', 'population') , ('population', '.')]
'''
nugget_list = []
for sent in sentences:
if nugget_type == 'n-grams':
nugget_items = list(ngrams(sent2stokens(sent, language), 2))
if nugget_type == 'NP':
nugget_items = get_phrases(sent, 'NP')
if nugget_type == 'Phrases':
nugget_items = get_phrases(sent, 'Phrases')
if nugget_type == 'NE':
nugget_items = get_phrases(sent, 'NE')
for nugget in nugget_items:
nugget_list.append(' '.join(nugget))
return nugget_list
data_helpers.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录