def getLsiModel(lsipath='./lsi/', num_topics=300):
# ????
dictionary = corpora.Dictionary.load(lsipath + 'viva.dict')
print '??????'
# ???
corpus = corpora.MmCorpus(lsipath +'viva.mm')
print ('mm load')
t31 = time.time()
# tfidf
tfidf = models.TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]
t32 = time.time()
print "tfidf_corpus time = ", t32 - t31
# baobao change 3 lines
# corpus = MyCorpus()
# lsi = models.LsiModel(corpus, id2word=dictionary, num_topics=NUM_TOPIC,power_iters=2,chunksize=50000,onepass=True,distributed=False)
# lsi = lsimodel.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=num_topics,chunksize=20000)
lsi = None
try:
lsi = lsimodel.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=num_topics, chunksize=60000, power_iters=2, onepass=True) #????????
lsi.save(lsipath + 'viva.lsi')
print('lsi??????')
except (SystemExit, KeyboardInterrupt):
raise
except Exception, e:
logging.error('Failed to lsi train', exc_info=True)
return lsi
评论列表
文章目录