clusters.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:idealoom 作者: conversence 项目源码 文件源码
def tfidf_model(self):
        if self._tfidf_model is None:
            doc_count = self.post_ids_query.count()
            if doc_count < 10:
                return None
            dictionary = self.dictionary
            tfidf_model = gmodels.TfidfModel(id2word=dictionary)
            tfidf_fname = join(self.dirname, "tfidf_%d.model" % (
                self.discussion.id,))
            subcorpus = self.subcorpus
            if exists(tfidf_fname):
                tfidf_model = tfidf_model.load(tfidf_fname)
                # assumption: count implies identity.
                # Wrong in corner cases: hidden, etc.
                if tfidf_model.num_docs != doc_count:
                    unlink(tfidf_fname)
                    tfidf_model = gmodels.TfidfModel(id2word=dictionary)
            if tfidf_model.num_docs != doc_count:
                tfidf_model.initialize(subcorpus)
                tfidf_model.save(tfidf_fname)
            self._tfidf_model = tfidf_model
        return self._tfidf_model
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号