lsa.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:nlp_sum 作者: Zhujunnan 项目源码 文件源码
def __call__(self, documentSet, words_limit, method="mmr", metric="tf", summary_order="origin"):
        dictionary = self._create_dictionary(documentSet)
        self.summary_order = summary_order
        # empty document
        if not dictionary:
            return ()
        if metric.lower() == "tf":
            matrix = self._create_matrix(documentSet, dictionary)
            matrix = self._compute_term_frequency(matrix)
        elif metric.lower() == "tfidf":
            matrix = self._create_tfidf_matrix(documentSet, dictionary)
        else:
            raise ValueError("Don't support your metric now.")
        u, sigma, v = svd(matrix, full_matrices=False)
        ranks = iter(self._compute_ranks(sigma, v))

        if method.lower() == "default":
            return self._get_best_sentences(documentSet.sentences, words_limit,
                                            lambda sent: next(ranks))
        if method.lower() == "mmr":
            return self._get_best_sentences_by_MMR(documentSet.sentences, words_limit,
                                                   matrix, lambda sent: next(ranks))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号