def keyphrases(self, N=20, fileids=None, categories=None):
"""
Returns the top N keyphrases grouped by document id.
TODO: this currently ignores fileids/categories.
"""
if not self.tfidfs or not self.lexicon or not self.fileids:
raise ValueError("Must call the score method first!")
for idx, doc in enumerate(self.tfidfs):
fileid = self.fileids[idx]
# Get the top N terms by TF-IDF score
scores = [
(self.lexicon[wid], score)
for wid, score in heapq.nlargest(N, doc, key=itemgetter(1))
]
yield fileid, scores
评论列表
文章目录