def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\U[0-9a-fA-F]{8}',
lambda m: unicode_escape(m.group(0))[0],
s)
python类getdecoder()的实例源码
def lowercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\u[0-9a-fA-F]{4}',
lambda m: unicode_escape(m.group(0))[0],
s)
def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\U[0-9a-fA-F]{8}',
lambda m: unicode_escape(m.group(0))[0],
s)
def lowercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\u[0-9a-fA-F]{4}',
lambda m: unicode_escape(m.group(0))[0],
s)
def test_decode_callback(self):
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
ab = "ab".encode("unicode_internal").decode()
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"ascii"),
"UnicodeInternalTest")
self.assertEqual(("ab", 12), ignored)
def test_getdecoder(self):
self.assertRaises(TypeError, codecs.getdecoder)
self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
def test_bad_decode_args(self):
for encoding in all_unicode_encodings:
decoder = codecs.getdecoder(encoding)
self.assertRaises(TypeError, decoder)
if encoding not in ("idna", "punycode"):
self.assertRaises(TypeError, decoder, 42)
def test_basics(self):
binput = bytes(range(256))
for encoding in bytes_transform_encodings:
# generic codecs interface
(o, size) = codecs.getencoder(encoding)(binput)
self.assertEqual(size, len(binput))
(i, size) = codecs.getdecoder(encoding)(o)
self.assertEqual(size, len(o))
self.assertEqual(i, binput)
def test_errorcallback_longindex(self):
dec = codecs.getdecoder('euc-kr')
myreplace = lambda exc: ('', sys.maxsize+1)
codecs.register_error('test.cjktest', myreplace)
self.assertRaises(IndexError, dec,
b'apple\x92ham\x93spam', 'test.cjktest')
translate.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def _unescape(text):
"""Unescape unicode character codes within a string.
"""
pattern = r'\\{1,2}u[0-9a-fA-F]{4}'
decode = lambda x: codecs.getdecoder('unicode_escape')(x.group())[0]
return re.sub(pattern, decode, text)
def codepage(self, codepage):
if not isinstance(codepage, CodePage):
raise TypeError("codepage should be a CodePage, not a %r" % type(codepage))
meta = self._meta
if meta.status != READ_WRITE:
raise DbfError('%s not in read/write mode, unable to change codepage' % meta.filename)
meta.header.codepage(codepage.code)
meta.decoder = codecs.getdecoder(codepage.name)
meta.encoder = codecs.getencoder(codepage.name)
self._update_disk(headeronly=True)
def codepage(self, codepage):
if not isinstance(codepage, CodePage):
raise TypeError("codepage should be a CodePage, not a %r" % type(codepage))
meta = self._meta
if meta.status != READ_WRITE:
raise DbfError('%s not in read/write mode, unable to change codepage' % meta.filename)
meta.header.codepage(codepage.code)
meta.decoder = codecs.getdecoder(codepage.name)
meta.encoder = codecs.getencoder(codepage.name)
self._update_disk(headeronly=True)
def codepage(self, codepage):
if not isinstance(codepage, CodePage):
raise TypeError("codepage should be a CodePage, not a %r" % type(codepage))
meta = self._meta
if meta.status != READ_WRITE:
raise DbfError('%s not in read/write mode, unable to change codepage' % meta.filename)
meta.header.codepage(codepage.code)
meta.decoder = codecs.getdecoder(codepage.name)
meta.encoder = codecs.getencoder(codepage.name)
self._update_disk(headeronly=True)
def test_nonascii_text_cptrans(self):
"check non-ascii text to unicode"
table = Table(':memory:', 'data C(50); memo M', codepage='cp437', dbf_type='vfp', on_disk=False)
table.open()
decoder = codecs.getdecoder('cp437')
if py_ver < (3, 0):
high_ascii = decoder(''.join(chr(c) for c in range(128, 128+50)))[0]
else:
high_ascii = bytes(range(128, 128+50)).decode('cp437')
table.append(dict(data=high_ascii, memo=high_ascii))
self.assertEqual(table[0].data, high_ascii)
self.assertEqual(table[0].memo, high_ascii)
table.close()
def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\U[0-9a-fA-F]{8}',
lambda m: unicode_escape(m.group(0))[0],
s)
def lowercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\u[0-9a-fA-F]{4}',
lambda m: unicode_escape(m.group(0))[0],
s)
def _unescape(text):
"""Unescape unicode character codes within a string.
"""
pattern = r'\\{1,2}u[0-9a-fA-F]{4}'
decode = lambda x: codecs.getdecoder('unicode_escape')(x.group())[0]
return re.sub(pattern, decode, text)
def test_decode_callback(self):
if sys.maxunicode > 0xffff:
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
ab = u"ab".encode("unicode_internal")
ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"UnicodeInternalTest")
self.assertEqual((u"ab", 12), ignored)
def test_getdecoder(self):
self.assertRaises(TypeError, codecs.getdecoder)
self.assertRaises(LookupError, codecs.getdecoder, "__spam__")