def fit(self, documents):
'''
parameters:
documents: list of strings, each represents a document
'''
# tokens, dictionary, corpus for LDA
self.tokens = self.preProcessCorpus(documents)
self.dictionary = corpora.Dictionary(self.tokens)
self.corpus = [self.dictionary.doc2bow(text) for text in self.tokens]
self.lda = self.getLDA(dictionary=self.dictionary,
corpus=self.corpus,
num_topics=self.num_topics,
random_state=self.random_state)
self.num_dominant_topics=min(10, self.num_topics)
self.dominant_topic_ids = self.getDominantTopics(self.corpus,
self.lda,
self.num_dominant_topics)
评论列表
文章目录