def encode(self, input, errors='strict'):
assert errors == 'strict'
#return codecs.encode(input, self.base_encoding, self.name), len(input)
# The above line could totally be all we needed, relying on the error
# handling to replace the unencodable Unicode characters with our extended
# byte sequences.
#
# However, there seems to be a design bug in Python (probably intentional):
# the error handler for encoding is supposed to return a **Unicode** character,
# that then needs to be encodable itself... Ugh.
#
# So we implement what codecs.encode() should have been doing: which is expect
# error handler to return bytes() to be added to the output.
#
# This seems to have been fixed in Python 3.3. We should try using that and
# use fallback only if that failed.
# https://docs.python.org/3.3/library/codecs.html#codecs.register_error
length = len(input)
out = b''
while input:
try:
part = codecs.encode(input, self.base_encoding)
out += part
input = '' # All converted
except UnicodeEncodeError as e:
# Convert the correct part
out += codecs.encode(input[:e.start], self.base_encoding)
replacement, pos = self.error(e)
out += replacement
input = input[pos:]
return out, length
评论列表
文章目录