def unescape(text: str) -> str:
def fixup(m: typing.Match):
in_text = m.group(0)
if in_text[:2] == "&#":
# character reference
try:
if in_text[:3] == "&#x":
return chr(int(in_text[3:-1], 16))
else:
return chr(int(in_text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
in_text = chr(html.entities.name2codepoint[in_text[1:-1]])
except KeyError:
pass
return in_text # leave as is
return re.sub("&#?\w+;", fixup, text)
评论列表
文章目录