def test_all(self):
api = (
"encode", "decode",
"register", "CodecInfo", "Codec", "IncrementalEncoder",
"IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
"getencoder", "getdecoder", "getincrementalencoder",
"getincrementaldecoder", "getreader", "getwriter",
"register_error", "lookup_error",
"strict_errors", "replace_errors", "ignore_errors",
"xmlcharrefreplace_errors", "backslashreplace_errors",
"open", "EncodedFile",
"iterencode", "iterdecode",
"BOM", "BOM_BE", "BOM_LE",
"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
"BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
"BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", # Undocumented
"StreamReaderWriter", "StreamRecoder",
)
self.assertEqual(sorted(api), sorted(codecs.__all__))
for api in codecs.__all__:
getattr(codecs, api)
python类ignore_errors()的实例源码
def test_fake_error_class(self):
handlers = [
codecs.strict_errors,
codecs.ignore_errors,
codecs.replace_errors,
codecs.backslashreplace_errors,
codecs.xmlcharrefreplace_errors,
]
for cls in UnicodeEncodeError, UnicodeDecodeError, UnicodeTranslateError:
class FakeUnicodeError(str):
__class__ = cls
for handler in handlers:
self.assertRaises(TypeError, handler, FakeUnicodeError())
class FakeUnicodeError(Exception):
__class__ = cls
for handler in handlers:
with self.assertRaises((TypeError, FakeUnicodeError)):
handler(FakeUnicodeError())
def test_all(self):
api = (
"encode", "decode",
"register", "CodecInfo", "Codec", "IncrementalEncoder",
"IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
"getencoder", "getdecoder", "getincrementalencoder",
"getincrementaldecoder", "getreader", "getwriter",
"register_error", "lookup_error",
"strict_errors", "replace_errors", "ignore_errors",
"xmlcharrefreplace_errors", "backslashreplace_errors",
"open", "EncodedFile",
"iterencode", "iterdecode",
"BOM", "BOM_BE", "BOM_LE",
"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
"BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
"BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", # Undocumented
"StreamReaderWriter", "StreamRecoder",
)
self.assertEqual(sorted(api), sorted(codecs.__all__))
for api in codecs.__all__:
getattr(codecs, api)
def test_fake_error_class(self):
handlers = [
codecs.strict_errors,
codecs.ignore_errors,
codecs.replace_errors,
codecs.backslashreplace_errors,
codecs.xmlcharrefreplace_errors,
]
for cls in UnicodeEncodeError, UnicodeDecodeError, UnicodeTranslateError:
class FakeUnicodeError(str):
__class__ = cls
for handler in handlers:
self.assertRaises(TypeError, handler, FakeUnicodeError())
class FakeUnicodeError(Exception):
__class__ = cls
for handler in handlers:
with self.assertRaises((TypeError, FakeUnicodeError)):
handler(FakeUnicodeError())
def test_all(self):
api = (
"encode", "decode",
"register", "CodecInfo", "Codec", "IncrementalEncoder",
"IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
"getencoder", "getdecoder", "getincrementalencoder",
"getincrementaldecoder", "getreader", "getwriter",
"register_error", "lookup_error",
"strict_errors", "replace_errors", "ignore_errors",
"xmlcharrefreplace_errors", "backslashreplace_errors",
"open", "EncodedFile",
"iterencode", "iterdecode",
"BOM", "BOM_BE", "BOM_LE",
"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
"BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
"BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", # Undocumented
"StreamReaderWriter", "StreamRecoder",
)
self.assertCountEqual(api, codecs.__all__)
for api in codecs.__all__:
getattr(codecs, api)
def test_fake_error_class(self):
handlers = [
codecs.strict_errors,
codecs.ignore_errors,
codecs.replace_errors,
codecs.backslashreplace_errors,
codecs.xmlcharrefreplace_errors,
codecs.lookup_error('surrogateescape'),
codecs.lookup_error('surrogatepass'),
]
for cls in UnicodeEncodeError, UnicodeDecodeError, UnicodeTranslateError:
class FakeUnicodeError(str):
__class__ = cls
for handler in handlers:
with self.subTest(handler=handler, error_class=cls):
self.assertRaises(TypeError, handler, FakeUnicodeError())
class FakeUnicodeError(Exception):
__class__ = cls
for handler in handlers:
with self.subTest(handler=handler, error_class=cls):
with self.assertRaises((TypeError, FakeUnicodeError)):
handler(FakeUnicodeError())
def create_fb_format(lines_file, convo_file, outpath):
print('[building fbformat]')
ftrain = open(os.path.join(outpath, 'train.txt'), 'w')
fvalid = open(os.path.join(outpath, 'valid.txt'), 'w')
ftest = open(os.path.join(outpath, 'test.txt'), 'w')
lines = {}
codecs.register_error('strict', codecs.ignore_errors)
with codecs.open(lines_file, 'r') as f:
for line in f:
l = line.split(' +++$+++ ')
lines[l[0]] = ' '.join(l[4:]).strip('\n').replace('\t', ' ')
cnt = 0
with codecs.open(convo_file, 'r') as f:
for line in f:
l = line.split(' ')
convo = ' '.join(l[6:]).strip('\n').strip('[').strip(']')
c = convo.replace("'",'').replace(' ','').split(',')
s = ''
index = 0
for i in range(0, len(c), 2):
index = index + 1
s = (s + str(index)+ ' ' + lines[c[i]])
if len(c) > i + 1:
s = s + '\t' + lines[c[i+1]]
s = s + '\n'
cnt = cnt + 1
handle = ftrain
if (cnt % 10) == 0:
handle = ftest
if (cnt % 10) == 1:
handle = fvalid
handle.write(s + '\n')
ftrain.close()
fvalid.close()
ftest.close()
def test_decode_callback(self):
if sys.maxunicode > 0xffff:
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
ab = "ab".encode("unicode_internal").decode()
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"ascii"),
"UnicodeInternalTest")
self.assertEqual(("ab", 12), ignored)
def test_badandgoodignoreexceptions(self):
# "ignore" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
# "ignore" complains about the wrong exception type
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEqual(
codecs.ignore_errors(
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
("", 1)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")),
("", 1)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
("", 1)
)
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(
codecs.xmlcharrefreplace_errors,
codecs.lookup_error("xmlcharrefreplace")
)
self.assertEqual(
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
def test_decode_callback(self):
if sys.maxunicode > 0xffff:
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
ab = u"ab".encode("unicode_internal")
ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"UnicodeInternalTest")
self.assertEqual((u"ab", 12), ignored)
def test_badandgoodignoreexceptions(self):
# "ignore" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
# "ignore" complains about the wrong exception type
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEqual(
codecs.ignore_errors(
UnicodeEncodeError("ascii", u"a\u3042b", 1, 2, "ouch")),
(u"", 2)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeDecodeError("ascii", "a\xffb", 1, 2, "ouch")),
(u"", 2)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeTranslateError(u"a\u3042b", 1, 2, "ouch")),
(u"", 2)
)
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(
codecs.xmlcharrefreplace_errors,
codecs.lookup_error("xmlcharrefreplace")
)
self.assertEqual(
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
def test_decode_callback(self):
if sys.maxunicode > 0xffff:
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
ab = u"ab".encode("unicode_internal")
ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"UnicodeInternalTest")
self.assertEqual((u"ab", 12), ignored)
def test_badandgoodignoreexceptions(self):
# "ignore" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
# "ignore" complains about the wrong exception type
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEqual(
codecs.ignore_errors(
UnicodeEncodeError("ascii", u"a\u3042b", 1, 2, "ouch")),
(u"", 2)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeDecodeError("ascii", "a\xffb", 1, 2, "ouch")),
(u"", 2)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeTranslateError(u"a\u3042b", 1, 2, "ouch")),
(u"", 2)
)
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(
codecs.xmlcharrefreplace_errors,
codecs.lookup_error("xmlcharrefreplace")
)
self.assertEqual(
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
def test_decode_callback(self):
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
ab = "ab".encode("unicode_internal").decode()
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"ascii"),
"UnicodeInternalTest")
self.assertEqual(("ab", 12), ignored)
def test_badandgoodignoreexceptions(self):
# "ignore" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
# "ignore" complains about the wrong exception type
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEqual(
codecs.ignore_errors(
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
("", 1)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")),
("", 1)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
("", 1)
)
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(
codecs.xmlcharrefreplace_errors,
codecs.lookup_error("xmlcharrefreplace")
)
self.assertEqual(
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
def test_decode_callback(self):
if sys.maxunicode > 0xffff:
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
ab = u"ab".encode("unicode_internal")
ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"UnicodeInternalTest")
self.assertEqual((u"ab", 12), ignored)
def test_badandgoodignoreexceptions(self):
# "ignore" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
# "ignore" complains about the wrong exception type
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEqual(
codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
(u"", 1)
)
self.assertEqual(
codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
(u"", 1)
)
self.assertEqual(
codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
(u"", 1)
)
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(
codecs.xmlcharrefreplace_errors,
codecs.lookup_error("xmlcharrefreplace")
)
self.assertEqual(
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
def test_decode_callback(self):
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
ab = "ab".encode("unicode_internal").decode()
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"ascii"),
"UnicodeInternalTest")
self.assertEqual(("ab", 12), ignored)
def test_badandgoodignoreexceptions(self):
# "ignore" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
# "ignore" complains about the wrong exception type
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEqual(
codecs.ignore_errors(
UnicodeEncodeError("ascii", "a\u3042b", 1, 2, "ouch")),
("", 2)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeDecodeError("ascii", bytearray(b"a\xffb"), 1, 2, "ouch")),
("", 2)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeTranslateError("a\u3042b", 1, 2, "ouch")),
("", 2)
)
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(
codecs.xmlcharrefreplace_errors,
codecs.lookup_error("xmlcharrefreplace")
)
self.assertEqual(
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
def test_decode_callback(self):
if sys.maxunicode > 0xffff:
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
ab = u"ab".encode("unicode_internal")
ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"UnicodeInternalTest")
self.assertEqual((u"ab", 12), ignored)
def test_badandgoodignoreexceptions(self):
# "ignore" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
# "ignore" complains about the wrong exception type
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEqual(
codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
(u"", 1)
)
self.assertEqual(
codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
(u"", 1)
)
self.assertEqual(
codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
(u"", 1)
)
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEqual(
codecs.xmlcharrefreplace_errors,
codecs.lookup_error("xmlcharrefreplace")
)
self.assertEqual(
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
def test_decode_callback(self):
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
ab = "ab".encode("unicode_internal").decode()
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"ascii"),
"UnicodeInternalTest")
self.assertEqual(("ab", 12), ignored)
def test_badandgoodignoreexceptions(self):
# "ignore" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
# "ignore" complains about the wrong exception type
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEqual(
codecs.ignore_errors(
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
("", 1)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")),
("", 1)
)
self.assertEqual(
codecs.ignore_errors(
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
("", 1)
)