def __init__(self, num_topics=6, num_iterations=500, random_state=None, clean_text=True, vectorizer=None):
"""
Init for LDA estimator
:param num_topics: Number of topics to model (generally 3-10)
:type num_topics: int
:param num_iterations: Number of iterations to allow before locking in topics
:type num_iterations: int
:param random_state: Random seed, for consistent topics
:type random_state: int
:param clean_text: Whether to clean text using self.preprocess(). Recommended if you have not preprocessed
the text already
:type clean_text: bool
:param vectorizer: Word vectorizer to use. The word vectorizer should convert a collection of text documents
to a matrix of token counts
"""
self.num_topics = num_topics
self.num_iterations = num_iterations
self.random_state = random_state
self.lda_model = lda.LDA(n_topics=self.num_topics, n_iter=self.num_iterations, random_state=self.random_state)
self.clean_text = clean_text
self.get_topic_description_df = None
if vectorizer is not None:
self.vectorizer = vectorizer
else:
self.vectorizer = CountVectorizer()
# Make sure nltk has required data sets
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
easy_lda.py 文件源码
python
阅读 50
收藏 0
点赞 0
评论 0
评论列表
文章目录