def ascii_to_unicode(cls, text):
def replace_ascii(match):
ascii = text[match.start():match.end()]
ascii = ascii.encode('ascii', 'ignore').strip() # convert escaped HTML entities back to original chars
if not ascii or ascii not in ascii_replace:
return ascii
return cls.convert(ascii_replace[ascii])
text = xhtml_unescape(text)
return re.sub(cls.ascii_compiled, replace_ascii, text)
评论列表
文章目录