def process(input_text):
# Create a regular expression tokenizer
tokenizer = RegexpTokenizer(r'\w+')
# Create a Snowball stemmer
stemmer = SnowballStemmer('english')
# Get the list of stop words
stop_words = stopwords.words('english')
# Tokenize the input string
tokens = tokenizer.tokenize(input_text.lower())
# Remove the stop words
tokens = [x for x in tokens if not x in stop_words]
# Perform stemming on the tokenized words
tokens_stemmed = [stemmer.stem(x) for x in tokens]
return tokens_stemmed
topic_modeler.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录