def build_model(self, fname=None, save_to=None):
id2word = self.id2word or self.build_id2word()
corpus = self.corpus or self.build_corpus()
# read model.lda file
if not fname:
fname = click.prompt('model file name', type=str, default='model.lda')
fname = self.__dest(fname)
# if there is no model file or the user wants to rebuild, build .model
if not os.path.isfile(fname) or click.confirm('There already is %s. Do you want to re run lda?' % fname):
num_procs = click.prompt('Number of processes to launch',
type=int,
default=multiprocessing.cpu_count())
num_epochs = click.prompt('Number of epochs to run', type=int, default=20)
num_topics = click.prompt('Number of topics', type=int, default=100)
print 'start building model'
start = time()
model = LdaMulticore(corpus, id2word=id2word, num_topics=num_topics, workers=num_procs, passes=num_epochs)
model.save(fname) #save
print 'building model takes: %s' % LdaUtils.human_readable_time(time() - start)
self.model = LdaMulticore.load(fname)
return self.model
评论列表
文章目录