def _detect_encoding(self, fileid):
if isinstance(fileid, PathPointer):
s = fileid.open().readline()
else:
with open(fileid, 'rb') as infile:
s = infile.readline()
if s.startswith(codecs.BOM_UTF16_BE):
return 'utf-16-be'
if s.startswith(codecs.BOM_UTF16_LE):
return 'utf-16-le'
if s.startswith(codecs.BOM_UTF32_BE):
return 'utf-32-be'
if s.startswith(codecs.BOM_UTF32_LE):
return 'utf-32-le'
if s.startswith(codecs.BOM_UTF8):
return 'utf-8'
m = re.match(br'\s*<\?xml\b.*\bencoding="([^"]+)"', s)
if m:
return m.group(1).decode()
m = re.match(br"\s*<\?xml\b.*\bencoding='([^']+)'", s)
if m:
return m.group(1).decode()
# No encoding found -- what should the default be?
return 'utf-8'
评论列表
文章目录