def extract(self, text, max_length=3, metric='avg', incl_scores=False):
"""Extract keywords and keyphrases from input text in descending order of score"""
sentences = nltk.sent_tokenize(text)
phrase_list = self._generate_candidate_keywords(sentences, max_length=max_length)
word_scores = self._calculate_word_scores(phrase_list)
phrase_scores = self._calculate_phrase_scores(phrase_list, word_scores, metric=metric)
sorted_phrase_scores = sorted(phrase_scores.iteritems(), key=operator.itemgetter(1), reverse=True)
n_phrases = len(sorted_phrase_scores)
if incl_scores:
return sorted_phrase_scores[0:int(n_phrases/self.top_fraction)]
else:
return map(lambda x: x[0], sorted_phrase_scores[0:int(n_phrases/self.top_fraction)])
评论列表
文章目录