lsi_stream_train.py 文件源码

python
阅读 17 收藏 0 点赞 0 评论 0

项目:recommended_system 作者: wac81 项目源码 文件源码
def getLsiModel(lsipath='./lsi/', num_topics=300):
    # ????
    dictionary = corpora.Dictionary.load(lsipath + 'viva.dict')
    print '??????'
    # ???
    corpus = corpora.MmCorpus(lsipath +'viva.mm')
    print ('mm load')

    t31 = time.time()

    # tfidf
    tfidf = models.TfidfModel(corpus)
    corpus_tfidf = tfidf[corpus]
    t32 = time.time()
    print "tfidf_corpus time = ", t32 - t31

    # baobao change 3 lines
    # corpus = MyCorpus()
    # lsi = models.LsiModel(corpus, id2word=dictionary, num_topics=NUM_TOPIC,power_iters=2,chunksize=50000,onepass=True,distributed=False)
    # lsi = lsimodel.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=num_topics,chunksize=20000)
    lsi = None
    try:
         lsi = lsimodel.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=num_topics, chunksize=60000, power_iters=2, onepass=True)  #????????
         lsi.save(lsipath  + 'viva.lsi')
         print('lsi??????')
    except (SystemExit, KeyboardInterrupt):
        raise
    except Exception, e:
        logging.error('Failed to lsi train', exc_info=True)

    return lsi
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号