def clean_text(self, txt):
"""
function to clean a text on the basis of configurations mentioned in clean config.
"""
txt = str(txt)
if self.cc['lower']:
txt = txt.lower()
if self.cc['punctuation']:
txt = "".join([x for x in txt if x not in punctuations])
if self.cc['whitespace']:
txt = "".join(txt.split()).strip()
if self.cc['digit']:
txt = "".join(x for x in txt if x not in "0987654321")
return txt
评论列表
文章目录