def _generate_candidate_keywords(self, sentences, max_length=3):
"""Creates a list of candidate keywords, or phrases of at most max_length words, from a set of sentences"""
phrase_list = []
for sentence in sentences:
words = map(lambda x: "|" if x in self.stopwords else x,
nltk.word_tokenize(sentence.lower()))
phrase = []
for word in words:
if word == "|" or is_punctuation(word):
if len(phrase) > 0:
if len(phrase) <= max_length:
phrase_list.append(phrase)
phrase = []
else:
phrase.append(word)
return phrase_list
评论列表
文章目录