def get_NN_entities(post):
sentences = nltk.tokenize.sent_tokenize(post)
token_sets = [nltk.tokenize.word_tokenize(s) for s in sentences]
pos_tagged_token_sets = [nltk.pos_tag(t) for t in token_sets]
pos_tagged_tokens = [t for v in pos_tagged_token_sets for t in v]
all_entities = []
previous_pos = None
current_entities = []
for (entity, pos) in pos_tagged_tokens:
if previous_pos == pos and pos.startswith('NN'):
current_entities.append(entity.lower())
elif pos.startswith('NN'):
if current_entities != []:
all_entities.append(' '.join(current_entities))
current_entities = [entity.lower()]
previous_pos = pos
return all_entities
reddit_NN_entities.py 文件源码
python
阅读 35
收藏 0
点赞 0
评论 0
评论列表
文章目录