def replace_punctuation(text, sub):
punctutation_cats = set(['Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po'])
chars = []
for my_char in text:
if unicodedata.category(my_char) in punctutation_cats:
chars.append(sub)
else:
chars.append(my_char)
return u"".join(chars)
# from http://stackoverflow.com/a/22238613/596939
评论列表
文章目录