def test_ascii_letters(self):
import unicodedata
for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
name = "LATIN SMALL LETTER %s" % char.upper()
code = unicodedata.lookup(name)
self.assertEqual(unicodedata.name(code), name)
python类lookup()的实例源码
def test_bmp_characters(self):
import unicodedata
count = 0
for code in xrange(0x10000):
char = unichr(code)
name = unicodedata.name(char, None)
if name is not None:
self.assertEqual(unicodedata.lookup(name), char)
count += 1
def test_errors(self):
import unicodedata
self.assertRaises(TypeError, unicodedata.name)
self.assertRaises(TypeError, unicodedata.name, u'xx')
self.assertRaises(TypeError, unicodedata.lookup)
self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
def test_ascii_letters(self):
import unicodedata
for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
name = "LATIN SMALL LETTER %s" % char.upper()
code = unicodedata.lookup(name)
self.assertEqual(unicodedata.name(code), name)
def test_bmp_characters(self):
import unicodedata
count = 0
for code in xrange(0x10000):
char = unichr(code)
name = unicodedata.name(char, None)
if name is not None:
self.assertEqual(unicodedata.lookup(name), char)
count += 1
def test_errors(self):
import unicodedata
self.assertRaises(TypeError, unicodedata.name)
self.assertRaises(TypeError, unicodedata.name, u'xx')
self.assertRaises(TypeError, unicodedata.lookup)
self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
def cmd_do(expr, msg=None):
actions = collections.OrderedDict((
('shrug', '¯\\_(?)_/¯'),
('lenny', '( ?° ?? ?°)'),
('flip', '??°?°??? ???'),
('homo', '?????o???'),
('look', '?_?'),
('cn', '[citation needed]'),
('boom', '??'),
('tweet', '??'),
('blink', '??'),
('see-no-evil', '??'),
('hear-no-evil', '??'),
('speak-no-evil', '??'),
('evil', '??????'),
('table', '(?>_<)?</?lq??>'),
('release-upgrade', '????'),
('however', ('???????????\n??????????\n'
'Something happened\n???????\n'
'?????????????\n???????\n???????')),
('mac', ('?????\n????\n???????\n????\n'
'?????\n??????\n??\n????'))
))
origexpr = expr
expr = expr.lower()
res = actions.get(expr)
if res:
return res
elif expr == 'help':
return ', '.join(actions.keys())
else:
try:
res = unicodedata.lookup(expr)
return res
except KeyError:
pass
if len(expr) <= 10:
res = ', '.join(unicodedata.name(ch) for ch in origexpr)
return res
else:
return 'Something happened.'
def _get_base_character(c):
desc = unicodedata.name(unicode(c))
cutoff = desc.find(' WITH ')
if cutoff != -1:
desc = desc[:cutoff]
return unicodedata.lookup(desc)
def test_ascii_letters(self):
for char in "".join(map(chr, range(ord("a"), ord("z")))):
name = "LATIN SMALL LETTER %s" % char.upper()
code = unicodedata.lookup(name)
self.assertEqual(unicodedata.name(code), name)
def test_bmp_characters(self):
for code in range(0x10000):
char = chr(code)
name = unicodedata.name(char, None)
if name is not None:
self.assertEqual(unicodedata.lookup(name), char)
def test_named_sequences_sample(self):
# Check a few named sequences. See #12753.
sequences = [
('LATIN SMALL LETTER R WITH TILDE', '\u0072\u0303'),
('TAMIL SYLLABLE SAI', '\u0BB8\u0BC8'),
('TAMIL SYLLABLE MOO', '\u0BAE\u0BCB'),
('TAMIL SYLLABLE NNOO', '\u0BA3\u0BCB'),
('TAMIL CONSONANT KSS', '\u0B95\u0BCD\u0BB7\u0BCD'),
]
for seqname, codepoints in sequences:
self.assertEqual(unicodedata.lookup(seqname), codepoints)
with self.assertRaises(SyntaxError):
self.checkletter(seqname, None)
with self.assertRaises(KeyError):
unicodedata.ucd_3_2_0.lookup(seqname)
test_reshape.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def test_unicode(self
): # See GH 6885 - get_dummies chokes on unicode values
import unicodedata
e = 'e'
eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
s = [e, eacute, eacute]
res = get_dummies(s, prefix='letter', sparse=self.sparse)
exp = DataFrame({'letter_e': {0: 1.0,
1: 0.0,
2: 0.0},
u('letter_%s') % eacute: {0: 0.0,
1: 1.0,
2: 1.0}})
assert_frame_equal(res, exp)
def normalize_char(c):
try:
cname = unicodedata.name( unicode(c) )
cname = cname[:cname.index( ' WITH' )]
return unicodedata.lookup( cname )
except ( ValueError, KeyError ):
return c
def unicode(self, name):
return lookup(name)
# Safe, fast math parser
def test_ascii_letters(self):
import unicodedata
for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
name = "LATIN SMALL LETTER %s" % char.upper()
code = unicodedata.lookup(name)
self.assertEqual(unicodedata.name(code), name)
def test_bmp_characters(self):
import unicodedata
count = 0
for code in xrange(0x10000):
char = unichr(code)
name = unicodedata.name(char, None)
if name is not None:
self.assertEqual(unicodedata.lookup(name), char)
count += 1
def test_errors(self):
import unicodedata
self.assertRaises(TypeError, unicodedata.name)
self.assertRaises(TypeError, unicodedata.name, u'xx')
self.assertRaises(TypeError, unicodedata.lookup)
self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
def test_ascii_letters(self):
for char in "".join(map(chr, range(ord("a"), ord("z")))):
name = "LATIN SMALL LETTER %s" % char.upper()
code = unicodedata.lookup(name)
self.assertEqual(unicodedata.name(code), name)
def test_bmp_characters(self):
for code in range(0x10000):
char = chr(code)
name = unicodedata.name(char, None)
if name is not None:
self.assertEqual(unicodedata.lookup(name), char)
def test_named_sequences_sample(self):
# Check a few named sequences. See #12753.
sequences = [
('LATIN SMALL LETTER R WITH TILDE', '\u0072\u0303'),
('TAMIL SYLLABLE SAI', '\u0BB8\u0BC8'),
('TAMIL SYLLABLE MOO', '\u0BAE\u0BCB'),
('TAMIL SYLLABLE NNOO', '\u0BA3\u0BCB'),
('TAMIL CONSONANT KSS', '\u0B95\u0BCD\u0BB7\u0BCD'),
]
for seqname, codepoints in sequences:
self.assertEqual(unicodedata.lookup(seqname), codepoints)
with self.assertRaises(SyntaxError):
self.checkletter(seqname, None)
with self.assertRaises(KeyError):
unicodedata.ucd_3_2_0.lookup(seqname)