def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
python类xhtml_unescape()的实例源码
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u('foo\u0abcbar')),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u('foo\u0abcbar')),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
escape_test.py 文件源码
项目:My-Web-Server-Framework-With-Python2.7
作者: syjsu
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
escape_test.py 文件源码
项目:My-Web-Server-Framework-With-Python2.7
作者: syjsu
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u('foo\u0abcbar')),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u"<foo>", u"<foo>"),
(b("<foo>"), b("<foo>")),
("<>&\"", "<>&""),
("&", "&amp;"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u('foo\u0abcbar')),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u('foo\u0abcbar')),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u"<foo>", u"<foo>"),
(b("<foo>"), b("<foo>")),
("<>&\"", "<>&""),
("&", "&amp;"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def _get_articel_info(self, article_info, nick_name, ori_create_time):
for k, v in article_info.items():
if isinstance(v, str):
article_info[k] = xhtml_unescape(v)
article_dict = {
'cdn_url': article_info['cover'].replace('\\', ''),
'title': article_info['title'],
'nick_name': nick_name,
'link': ('http://mp.weixin.qq.com' +
article_info['content_url'].replace('\\', '')),
'ori_create_time': ori_create_time,
'desc': article_info['digest'],
}
return article_dict
def ascii_to_unicode(cls, text):
def replace_ascii(match):
ascii = text[match.start():match.end()]
ascii = ascii.encode('ascii', 'ignore').strip() # convert escaped HTML entities back to original chars
if not ascii or ascii not in ascii_replace:
return ascii
return cls.convert(ascii_replace[ascii])
text = xhtml_unescape(text)
return re.sub(cls.ascii_compiled, replace_ascii, text)
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"", "<>&""),
("&", "&amp;"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u('foo\u0abcbar')),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u"<foo>", u"<foo>"),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u"<\u00e9>", u"<\u00e9>"),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u'foo\u0abcbar'),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u"<foo>", u"<foo>"),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u"<\u00e9>", u"<\u00e9>"),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u'foo\u0abcbar'),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u"<foo>", u"<foo>"),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u"<\u00e9>", u"<\u00e9>"),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u'foo\u0abcbar'),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u("<foo>"), u("<foo>")),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u("<\u00e9>"), u("<\u00e9>")),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u('foo\u0abcbar')),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u"<foo>", u"<foo>"),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u"<\u00e9>", u"<\u00e9>"),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u'foo\u0abcbar'),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self):
tests = [
("<foo>", "<foo>"),
(u"<foo>", u"<foo>"),
(b"<foo>", b"<foo>"),
("<>&\"'", "<>&"'"),
("&", "&amp;"),
(u"<\u00e9>", u"<\u00e9>"),
(b"<\xc3\xa9>", b"<\xc3\xa9>"),
]
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))