def extract_candidate_words(sents, tags=GOODTAGS, tagged=False, **kwargs):
"""
Extracts key words based on a list of good part of speech tags.
If the sentences are already tokenized and tagged, pass in: tagged=True
"""
normalizer = Normalizer(**kwargs)
for sent in sents:
# Tokenize and tag sentences if necessary
if not tagged:
sent = nltk.pos_tag(nltk.wordpunct_tokenize(sent))
# Identify only good words by their tag
for token, tag in sent:
if tag in tags:
for token in normalizer.normalize([token]):
yield token
##########################################################################
## Key phrase by text scoring mechanisms
##########################################################################
评论列表
文章目录