def test_unicode_newlines(self):
# Ensure that only \r\n is recognized as a header separator, and not
# the other newline-like unicode characters.
# Characters that are likely to be problematic can be found in
# http://unicode.org/standard/reports/tr13/tr13-5.html
# and cpython's unicodeobject.c (which defines the implementation
# of unicode_type.splitlines(), and uses a different list than TR13).
newlines = [
u('\u001b'), # VERTICAL TAB
u('\u001c'), # FILE SEPARATOR
u('\u001d'), # GROUP SEPARATOR
u('\u001e'), # RECORD SEPARATOR
u('\u0085'), # NEXT LINE
u('\u2028'), # LINE SEPARATOR
u('\u2029'), # PARAGRAPH SEPARATOR
]
for newline in newlines:
# Try the utf8 and latin1 representations of each newline
for encoding in ['utf8', 'latin1']:
try:
try:
encoded = newline.encode(encoding)
except UnicodeEncodeError:
# Some chars cannot be represented in latin1
continue
data = b'Cookie: foo=' + encoded + b'bar'
# parse() wants a native_str, so decode through latin1
# in the same way the real parser does.
headers = HTTPHeaders.parse(
native_str(data.decode('latin1')))
expected = [('Cookie', 'foo=' +
native_str(encoded.decode('latin1')) + 'bar')]
self.assertEqual(
expected, list(headers.get_all()))
except Exception:
gen_log.warning("failed while trying %r in %s",
newline, encoding)
raise
评论列表
文章目录