def decode_raw_stream(self, text, decode_raw, known_encoding, filename):
"""given string/unicode or bytes/string, determine encoding
from magic encoding comment, return body as unicode
or raw if decode_raw=False
"""
if isinstance(text, compat.text_type):
m = self._coding_re.match(text)
encoding = m and m.group(1) or known_encoding or 'ascii'
return encoding, text
if text.startswith(codecs.BOM_UTF8):
text = text[len(codecs.BOM_UTF8):]
parsed_encoding = 'utf-8'
m = self._coding_re.match(text.decode('utf-8', 'ignore'))
if m is not None and m.group(1) != 'utf-8':
raise exceptions.CompileException(
"Found utf-8 BOM in file, with conflicting "
"magic encoding comment of '%s'" % m.group(1),
text.decode('utf-8', 'ignore'),
0, 0, filename)
else:
m = self._coding_re.match(text.decode('utf-8', 'ignore'))
if m:
parsed_encoding = m.group(1)
else:
parsed_encoding = known_encoding or 'ascii'
if decode_raw:
try:
text = text.decode(parsed_encoding)
except UnicodeDecodeError:
raise exceptions.CompileException(
"Unicode decode operation of encoding '%s' failed" %
parsed_encoding,
text.decode('utf-8', 'ignore'),
0, 0, filename)
return parsed_encoding, text
评论列表
文章目录