codecs.py 文件源码-python代码片段

def encode(self, input, errors='strict'):
        assert errors == 'strict'
        #return codecs.encode(input, self.base_encoding, self.name), len(input)

        # The above line could totally be all we needed, relying on the error
        # handling to replace the unencodable Unicode characters with our extended
        # byte sequences.
        #
        # However, there seems to be a design bug in Python (probably intentional):
        # the error handler for encoding is supposed to return a **Unicode** character,
        # that then needs to be encodable itself...  Ugh.
        #
        # So we implement what codecs.encode() should have been doing: which is expect
        # error handler to return bytes() to be added to the output.
        #
        # This seems to have been fixed in Python 3.3.  We should try using that and
        # use fallback only if that failed.
        # https://docs.python.org/3.3/library/codecs.html#codecs.register_error

        length = len(input)
        out = b''
        while input:
            try:
                part = codecs.encode(input, self.base_encoding)
                out += part
                input = '' # All converted
            except UnicodeEncodeError as e:
                # Convert the correct part
                out += codecs.encode(input[:e.start], self.base_encoding)
                replacement, pos = self.error(e)
                out += replacement
                input = input[pos:]
        return out, length