nlp.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:weibo_scrawler_app 作者: coolspiderghy 项目源码 文件源码
def train_by_lsi(lib_texts):
    """
        ??LSI?????
    """
    from gensim import corpora, models, similarities

    #?????????
    #import logging
    #logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

    dictionary = corpora.Dictionary(lib_texts)
    corpus = [dictionary.doc2bow(text) for text in lib_texts]     #doc2bow(): ?collection words ?????????(word_id, word_frequency)??
    tfidf = models.TfidfModel(corpus)
    corpus_tfidf = tfidf[corpus]

    #???????topic???10?LSI??
    lsi = models.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=10)
    index = similarities.MatrixSimilarity(lsi[corpus])     # index ? gensim.similarities.docsim.MatrixSimilarity ??

    return (index, dictionary, lsi)


#????? -- ??????????????????????
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号