def train_by_lsi(lib_texts):
"""
??LSI?????
"""
from gensim import corpora, models, similarities
#?????????
#import logging
#logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
dictionary = corpora.Dictionary(lib_texts)
corpus = [dictionary.doc2bow(text) for text in lib_texts] #doc2bow(): ?collection words ?????????(word_id, word_frequency)??
tfidf = models.TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]
#???????topic???10?LSI??
lsi = models.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=10)
index = similarities.MatrixSimilarity(lsi[corpus]) # index ? gensim.similarities.docsim.MatrixSimilarity ??
return (index, dictionary, lsi)
#????? -- ??????????????????????
评论列表
文章目录