def __init__(self):
self.stemmer = LancasterStemmer()
#Convert a collection of text documents to a matrix of token counts
#Remove accents during the preprocessing step.
self.vectorizer = CountVectorizer(strip_accents='ascii')
self.tokenizer = self.vectorizer.build_tokenizer()
self.preprocessor = self.vectorizer.build_preprocessor()
self.spellchecker = enchant.DictWithPWL("en_US",
pwl=path_config.PERSONAL_WORD_DICTIONARY_FILE)
评论列表
文章目录