def get_valid_bigram_words(self, words):
_words = []
for i in nltk.bigrams(words):
if (len(i[0]) >= self.min_len) and (len(i[1]) >= self.min_len):
if (not self.exclude_stopwords) or ((i[0] not in config.STOP_WORDS) and (i[1] not in config.STOP_WORDS)):
if (not self.skip_digit) or ((len(re.findall(re.compile("\d+"), i[0])) == 0) and (len(re.findall(re.compile("\d+"), i[1])) == 0)):
_words.append(" ".join(i))
return _words
spelling_checker.py 文件源码
python
阅读 38
收藏 0
点赞 0
评论 0
评论列表
文章目录