def remove_emoticons(text):
# build regexp with imported emoticon list
smileys = '|'.join(map(re.escape, emoticons))
emoticonsPattern = re.compile('({})'.format(smileys), flags=re.IGNORECASE)
removed = re.sub(emoticonsPattern, '', text)
# remove unnecessary white spaces utilizing the TweetTokenizer
removed = tokenize(removed)
return " ".join(sum(removed, []))
评论列表
文章目录