keyphrase.py 文件源码

python
阅读 42 收藏 0 点赞 0 评论 0

项目:minke 作者: DistrictDataLabs 项目源码 文件源码
def keyphrases(self, N=20, fileids=None, categories=None):
        """
        Returns the top N keyphrases grouped by document id.
        TODO: this currently ignores fileids/categories.
        """
        if not self.tfidfs or not self.lexicon or not self.fileids:
            raise ValueError("Must call the score method first!")

        for idx, doc in enumerate(self.tfidfs):
            fileid = self.fileids[idx]

            # Get the top N terms by TF-IDF score
            scores = [
                (self.lexicon[wid], score)
                for wid, score in heapq.nlargest(N, doc, key=itemgetter(1))
            ]

            yield fileid, scores
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号