lda.py 文件源码

python
阅读 30 收藏 0 点赞 0 评论 0

项目:Sentences-analysis 作者: sungminoh 项目源码 文件源码
def build_id2word(self, fname=None, save_to=None):
        # read words.csv file
        if not fname:
            fname = self.words_fname or click.prompt('words file')
        fname = self.__dest(fname)
        assert os.path.isfile(fname), 'No such file: %s' % fname
        if save_to:
            self.id2word_fname = self.__dest(save_to)
        else:
            self.id2word_fname = LdaUtils.change_ext(fname, 'id2word')
        # if there is no id2word file or the user wants to rebuild, build .id2word
        if not os.path.isfile(self.id2word_fname) or click.confirm('There alread is id2word. Do you want to rebuild?'):
            print 'start building id2word'
            start = time()
            id2word = corpora.Dictionary(LdaUtils.filter_words(LdaUtils.iter_csv(fname, -1).split()))
            id2word.save(self.id2word_fname)  # save
            print 'building id2word takes: %s' % LdaUtils.human_readable_time(time() - start)
        self.id2word = corpora.Dictionary.load(self.id2word_fname)
        return self.id2word
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号