def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
python类utf_8_decode()的实例源码
def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = ''
self.pointer = 0
self.full_buffer = unicode('')
self.full_pointer = 0
self.raw_buffer = None
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.index = 0
self.line = 0
self.column = 0
self.stream = stream
self.name = getattr(stream, 'name', '<file>')
self.eof = False
self.raw_buffer = None
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
self.update(1)
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def test_decode_unicode(self):
# Most decoders don't accept unicode input
decoders = [
codecs.utf_7_decode,
codecs.utf_8_decode,
codecs.utf_16_le_decode,
codecs.utf_16_be_decode,
codecs.utf_16_ex_decode,
codecs.utf_32_decode,
codecs.utf_32_le_decode,
codecs.utf_32_be_decode,
codecs.utf_32_ex_decode,
codecs.latin_1_decode,
codecs.ascii_decode,
codecs.charmap_decode,
]
if hasattr(codecs, "mbcs_decode"):
decoders.append(codecs.mbcs_decode)
for decoder in decoders:
self.assertRaises(TypeError, decoder, "xxx")
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def determine_encoding(self):
# type: () -> None
while not self.eof and (self.raw_buffer is None or
len(self.raw_buffer) < 2):
self.update_raw()
if isinstance(self.raw_buffer, binary_type):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode # type: ignore
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode # type: ignore
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode # type: ignore
self.encoding = 'utf-8'
self.update(1)
# 4 if 32 bit unicode supported, 2 e.g. on MacOS (issue 56)
def test_decode_unicode(self):
# Most decoders don't accept unicode input
decoders = [
codecs.utf_7_decode,
codecs.utf_8_decode,
codecs.utf_16_le_decode,
codecs.utf_16_be_decode,
codecs.utf_16_ex_decode,
codecs.utf_32_decode,
codecs.utf_32_le_decode,
codecs.utf_32_be_decode,
codecs.utf_32_ex_decode,
codecs.latin_1_decode,
codecs.ascii_decode,
codecs.charmap_decode,
]
if hasattr(codecs, "mbcs_decode"):
decoders.append(codecs.mbcs_decode)
for decoder in decoders:
self.assertRaises(TypeError, decoder, "xxx")
def send_mail(json_string):
# Extract sender and subject
json_blob = json.loads(json_string)
sender = json_blob['headers']['From']
sender = re.sub('^.*\<', '', sender)
EMAIL_TO = re.sub('\>.*$', '', sender)
if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) for '+EMAIL_TO)
subj = common_functions.extract_subject(json_blob['headers'])
if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) subject '+subj)
#SUBJECT = 'Extracted IOCs for: '+subj.decode("utf-8", "ignore")
SUBJECT = 'Extracted IOCs for: '+str(codecs.utf_8_decode(subj.encode('utf8'))[0])
if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) subject '+SUBJECT)
msg = MIMEText(json2string(json_string), _charset='utf-8')
msg['Subject'] = SUBJECT
msg['From'] = EMAIL_FROM
msg['To'] = EMAIL_TO
if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) msg composed ')
server = smtplib.SMTP(EMAIL_SERVER)
server.sendmail(EMAIL_FROM, EMAIL_TO, msg.as_string())
if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Finished')
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def test_decode_unicode(self):
# Most decoders don't accept unicode input
decoders = [
codecs.utf_7_decode,
codecs.utf_8_decode,
codecs.utf_16_le_decode,
codecs.utf_16_be_decode,
codecs.utf_16_ex_decode,
codecs.utf_32_decode,
codecs.utf_32_le_decode,
codecs.utf_32_be_decode,
codecs.utf_32_ex_decode,
codecs.latin_1_decode,
codecs.ascii_decode,
codecs.charmap_decode,
]
if hasattr(codecs, "mbcs_decode"):
decoders.append(codecs.mbcs_decode)
for decoder in decoders:
self.assertRaises(TypeError, decoder, "xxx")
def decode(input, errors='strict'):
return codecs.utf_8_decode(input, errors, True)
def decode(input, errors='strict'):
prefix = 0
if input[:3] == codecs.BOM_UTF8:
input = input[3:]
prefix = 3
(output, consumed) = codecs.utf_8_decode(input, errors, True)
return (output, consumed+prefix)
def decode(self, input, errors='strict'):
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this is a BOM
# => try again on the next call
return ("", 0)
elif input[:3] == codecs.BOM_UTF8:
self.decode = codecs.utf_8_decode
(output, consumed) = codecs.utf_8_decode(input[3:],errors)
return (output, consumed+3)
# (else) no BOM present
self.decode = codecs.utf_8_decode
return codecs.utf_8_decode(input, errors)
### encodings module API
def decode(input, errors='strict'):
return codecs.utf_8_decode(input, errors, True)
def decode(input, errors='strict'):
prefix = 0
if input[:3] == codecs.BOM_UTF8:
input = input[3:]
prefix = 3
(output, consumed) = codecs.utf_8_decode(input, errors, True)
return (output, consumed+prefix)
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return (u"", 0)
else:
self.first = None
else:
self.first = None
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def decode(self, input, errors='strict'):
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this is a BOM
# => try again on the next call
return (u"", 0)
elif input[:3] == codecs.BOM_UTF8:
self.decode = codecs.utf_8_decode
(output, consumed) = codecs.utf_8_decode(input[3:],errors)
return (output, consumed+3)
# (else) no BOM present
self.decode = codecs.utf_8_decode
return codecs.utf_8_decode(input, errors)
### encodings module API
def decode(input, errors='strict'):
prefix = 0
if input[:3] == codecs.BOM_UTF8:
input = input[3:]
prefix = 3
(output, consumed) = codecs.utf_8_decode(input, errors, True)
return (output, consumed+prefix)
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return (u"", 0)
else:
self.first = None
else:
self.first = None
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def decode(self, input, errors='strict'):
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this is a BOM
# => try again on the next call
return (u"", 0)
elif input[:3] == codecs.BOM_UTF8:
self.decode = codecs.utf_8_decode
(output, consumed) = codecs.utf_8_decode(input[3:],errors)
return (output, consumed+3)
# (else) no BOM present
self.decode = codecs.utf_8_decode
return codecs.utf_8_decode(input, errors)
### encodings module API
def decode(input, errors='strict'):
return codecs.utf_8_decode(input, errors, True)
def determine_encoding(self):
while not self.eof and len(self.raw_buffer) < 2:
self.update_raw()
if not isinstance(self.raw_buffer, unicode):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
def determine_encoding(self):
while not self.eof and len(self.raw_buffer) < 2:
self.update_raw()
if not isinstance(self.raw_buffer, unicode):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
def determine_encoding(self):
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
if isinstance(self.raw_buffer, bytes):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
def decode(input, errors='strict'):
return codecs.utf_8_decode(input, errors, True)
def decode(input, errors='strict'):
prefix = 0
if input[:3] == codecs.BOM_UTF8:
input = input[3:]
prefix = 3
(output, consumed) = codecs.utf_8_decode(input, errors, True)
return (output, consumed+prefix)