def remove_stopwords(self, tokens):
"""Remove stopwords from token list.
:param tokens: tokens from which stopwords should be removed
:type tokens: list
:return: tokens with filtered out stopwords
"""
ret = []
for token in tokens:
if token in self._raw_stopwords:
_logger.debug("Dropping raw stopword '%s'", token)
continue
for regexp in self._regexp_stopwords:
if re.fullmatch(regexp, token):
_logger.debug("Dropping stopword '%s' based on regexp '%s'", token,
regexp.pattern)
continue
ret.append(token)
return ret
tokenizer.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录