def test_errors(self):
tests = [
(b'\xff', u'\ufffd'),
(b'A\x00Z', u'A\ufffd'),
(b'A\x00B\x00C\x00D\x00Z', u'ABCD\ufffd'),
(b'\x00\xd8', u'\ufffd'),
(b'\x00\xd8A', u'\ufffd'),
(b'\x00\xd8A\x00', u'\ufffdA'),
(b'\x00\xdcA\x00', u'\ufffdA'),
]
for raw, expected in tests:
try:
with self.assertRaises(UnicodeDecodeError):
codecs.utf_16_le_decode(raw, 'strict', True)
self.assertEqual(raw.decode('utf-16le', 'replace'), expected)
except:
print 'raw=%r' % raw
raise
python类utf_16_le_decode()的实例源码
def test_errors(self):
tests = [
(b'\xff', u'\ufffd'),
(b'A\x00Z', u'A\ufffd'),
(b'A\x00B\x00C\x00D\x00Z', u'ABCD\ufffd'),
(b'\x00\xd8', u'\ufffd'),
(b'\x00\xd8A', u'\ufffd'),
(b'\x00\xd8A\x00', u'\ufffdA'),
(b'\x00\xdcA\x00', u'\ufffdA'),
]
for raw, expected in tests:
try:
with self.assertRaises(UnicodeDecodeError):
codecs.utf_16_le_decode(raw, 'strict', True)
self.assertEqual(raw.decode('utf-16le', 'replace'), expected)
except:
print 'raw=%r' % raw
raise
def determine_encoding(self):
# type: () -> None
while not self.eof and (self.raw_buffer is None or
len(self.raw_buffer) < 2):
self.update_raw()
if isinstance(self.raw_buffer, binary_type):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode # type: ignore
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode # type: ignore
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode # type: ignore
self.encoding = 'utf-8'
self.update(1)
# 4 if 32 bit unicode supported, 2 e.g. on MacOS (issue 56)
def test_decode_unicode(self):
# Most decoders don't accept unicode input
decoders = [
codecs.utf_7_decode,
codecs.utf_8_decode,
codecs.utf_16_le_decode,
codecs.utf_16_be_decode,
codecs.utf_16_ex_decode,
codecs.utf_32_decode,
codecs.utf_32_le_decode,
codecs.utf_32_be_decode,
codecs.utf_32_ex_decode,
codecs.latin_1_decode,
codecs.ascii_decode,
codecs.charmap_decode,
]
if hasattr(codecs, "mbcs_decode"):
decoders.append(codecs.mbcs_decode)
for decoder in decoders:
self.assertRaises(TypeError, decoder, "xxx")
def decode(input, errors='strict'):
return codecs.utf_16_le_decode(input, errors, True)
def _buffer_decode(self, input, errors, final):
if self.decoder is None:
(output, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, final)
if byteorder == -1:
self.decoder = codecs.utf_16_le_decode
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)
def setstate(self, state):
# state[1] will be ignored by BufferedIncrementalDecoder.setstate()
codecs.BufferedIncrementalDecoder.setstate(self, state)
state = state[1]
if state == 0:
self.decoder = (codecs.utf_16_be_decode
if sys.byteorder == "big"
else codecs.utf_16_le_decode)
elif state == 1:
self.decoder = (codecs.utf_16_le_decode
if sys.byteorder == "big"
else codecs.utf_16_be_decode)
else:
self.decoder = None
def decode(self, input, errors='strict'):
(object, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, False)
if byteorder == -1:
self.decode = codecs.utf_16_le_decode
elif byteorder == 1:
self.decode = codecs.utf_16_be_decode
elif consumed>=2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (object, consumed)
### encodings module API
def decode(input, errors='strict'):
return codecs.utf_16_le_decode(input, errors, True)
def _buffer_decode(self, input, errors, final):
if self.decoder is None:
(output, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, final)
if byteorder == -1:
self.decoder = codecs.utf_16_le_decode
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)
def decode(self, input, errors='strict'):
(object, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, False)
if byteorder == -1:
self.decode = codecs.utf_16_le_decode
elif byteorder == 1:
self.decode = codecs.utf_16_be_decode
elif consumed>=2:
raise UnicodeError,"UTF-16 stream does not start with BOM"
return (object, consumed)
### encodings module API
def decode(input, errors='strict'):
return codecs.utf_16_le_decode(input, errors, True)
def _buffer_decode(self, input, errors, final):
if self.decoder is None:
(output, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, final)
if byteorder == -1:
self.decoder = codecs.utf_16_le_decode
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)
def decode(self, input, errors='strict'):
(object, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, False)
if byteorder == -1:
self.decode = codecs.utf_16_le_decode
elif byteorder == 1:
self.decode = codecs.utf_16_be_decode
elif consumed>=2:
raise UnicodeError,"UTF-16 stream does not start with BOM"
return (object, consumed)
### encodings module API
def determine_encoding(self):
while not self.eof and len(self.raw_buffer) < 2:
self.update_raw()
if not isinstance(self.raw_buffer, unicode):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
def determine_encoding(self):
while not self.eof and len(self.raw_buffer) < 2:
self.update_raw()
if not isinstance(self.raw_buffer, unicode):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
def determine_encoding(self):
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
if isinstance(self.raw_buffer, bytes):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
def decode(input, errors='strict'):
return codecs.utf_16_le_decode(input, errors, True)
def _buffer_decode(self, input, errors, final):
if self.decoder is None:
(output, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, final)
if byteorder == -1:
self.decoder = codecs.utf_16_le_decode
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)
def decode(self, input, errors='strict'):
(object, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, False)
if byteorder == -1:
self.decode = codecs.utf_16_le_decode
elif byteorder == 1:
self.decode = codecs.utf_16_be_decode
elif consumed>=2:
raise UnicodeError,"UTF-16 stream does not start with BOM"
return (object, consumed)
### encodings module API
def decode(input, errors='strict'):
return codecs.utf_16_le_decode(input, errors, True)
def _buffer_decode(self, input, errors, final):
if self.decoder is None:
(output, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, final)
if byteorder == -1:
self.decoder = codecs.utf_16_le_decode
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)
def decode(self, input, errors='strict'):
(object, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, False)
if byteorder == -1:
self.decode = codecs.utf_16_le_decode
elif byteorder == 1:
self.decode = codecs.utf_16_be_decode
elif consumed>=2:
raise UnicodeError,"UTF-16 stream does not start with BOM"
return (object, consumed)
### encodings module API
def decode(input, errors='strict'):
return codecs.utf_16_le_decode(input, errors, True)
def _buffer_decode(self, input, errors, final):
if self.decoder is None:
(output, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, final)
if byteorder == -1:
self.decoder = codecs.utf_16_le_decode
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)
def decode(input, errors='strict'):
return codecs.utf_16_le_decode(input, errors, True)
def _buffer_decode(self, input, errors, final):
if self.decoder is None:
(output, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, final)
if byteorder == -1:
self.decoder = codecs.utf_16_le_decode
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)
def setstate(self, state):
# state[1] will be ignored by BufferedIncrementalDecoder.setstate()
codecs.BufferedIncrementalDecoder.setstate(self, state)
state = state[1]
if state == 0:
self.decoder = (codecs.utf_16_be_decode
if sys.byteorder == "big"
else codecs.utf_16_le_decode)
elif state == 1:
self.decoder = (codecs.utf_16_le_decode
if sys.byteorder == "big"
else codecs.utf_16_be_decode)
else:
self.decoder = None
def decode(self, input, errors='strict'):
(object, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, False)
if byteorder == -1:
self.decode = codecs.utf_16_le_decode
elif byteorder == 1:
self.decode = codecs.utf_16_be_decode
elif consumed>=2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (object, consumed)
### encodings module API
def decode(input, errors='strict'):
return codecs.utf_16_le_decode(input, errors, True)