def _sanitize(self, text):
# removing duplicated spaces
text = ' '.join(text.split())
# removing digits
text = ''.join([c for c in text if not c.isdigit()])
# removing accents
text = unidecode(text)
# removnig punctuations
text = text.translate(
string.maketrans("-'", ' ')).translate(None,
string.punctuation)
# remove uppercase
text = text.lower()
return text
评论列表
文章目录