def build_id2word(self, fname=None, save_to=None):
# read words.csv file
if not fname:
fname = self.words_fname or click.prompt('words file')
fname = self.__dest(fname)
assert os.path.isfile(fname), 'No such file: %s' % fname
if save_to:
self.id2word_fname = self.__dest(save_to)
else:
self.id2word_fname = LdaUtils.change_ext(fname, 'id2word')
# if there is no id2word file or the user wants to rebuild, build .id2word
if not os.path.isfile(self.id2word_fname) or click.confirm('There alread is id2word. Do you want to rebuild?'):
print 'start building id2word'
start = time()
id2word = corpora.Dictionary(LdaUtils.filter_words(LdaUtils.iter_csv(fname, -1).split()))
id2word.save(self.id2word_fname) # save
print 'building id2word takes: %s' % LdaUtils.human_readable_time(time() - start)
self.id2word = corpora.Dictionary.load(self.id2word_fname)
return self.id2word
评论列表
文章目录