def train_lsi_model_gensim(corpus, total_topics=2):
norm_tokenized_corpus = normalize_corpus(corpus, tokenize=True)
dictionary = corpora.Dictionary(norm_tokenized_corpus)
mapped_corpus = [dictionary.doc2bow(text)
for text in norm_tokenized_corpus]
tfidf = models.TfidfModel(mapped_corpus)
corpus_tfidf = tfidf[mapped_corpus]
lsi = models.LsiModel(corpus_tfidf,
id2word=dictionary,
num_topics=total_topics)
return lsi
topic_modeling.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录