def train_lda_model_gensim(corpus, total_topics=2):
norm_tokenized_corpus = normalize_corpus(corpus, tokenize=True)
dictionary = corpora.Dictionary(norm_tokenized_corpus)
mapped_corpus = [dictionary.doc2bow(text)
for text in norm_tokenized_corpus]
tfidf = models.TfidfModel(mapped_corpus)
corpus_tfidf = tfidf[mapped_corpus]
lda = models.LdaModel(corpus_tfidf,
id2word=dictionary,
iterations=1000,
num_topics=total_topics)
return lda
topic_modeling.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录