lda.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:Sentences-analysis 作者: sungminoh 项目源码 文件源码
def build_corpus(self, fname=None, save_to=None):
        # read sentences file
        if not fname:
            fname = click.prompt('sentences file')
        fname = self.__dest(fname)
        assert os.path.isfile(fname), 'No such file: %s' % fname
        if save_to:
            self.corpus_fname = self.__dest(save_to)
        else:
            self.corpus_fname = LdaUtils.change_ext(fname, 'corpus')
        # if there is no corpus file or the user wants to rebuild, build .corpus
        if not os.path.isfile(self.corpus_fname) or click.confirm('There already is corpus. Do you want to rebuild?'):
            print 'start building corpus'
            start = time()
            corpora.MmCorpus.serialize(self.corpus_fname, self.__iter_doc2bow(LdaUtils.iter_csv(fname, -1).split()))  # save
            print 'building corpus takes: %s' % LdaUtils.human_readable_time(time() - start)
        self.corpus = corpora.MmCorpus(self.corpus_fname)
        return self.corpus
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号