def __init__(self, num_topics=100, min_word_count=20,
top_most_common_words=10, min_doc_length=40,
max_doc_length=1000, random_state=None):
self.num_topics = num_topics
self.min_word_count = min_word_count
self.top_most_common_words = top_most_common_words
assert max_doc_length > min_doc_length, \
"max_doc_length must be greater than min_doc_length"
self.min_doc_length = min_doc_length
self.max_doc_length = max_doc_length
self.random_state = random_state
# natural language processing
self.stop_words = self.getEnglishStopWords()
self.bigramizer = Phrases()
评论列表
文章目录