def unichr(ch):
if ch <= sys.maxunicode:
return _unichr(ch)
else:
ch -= 0x10000
return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00)
python类unichr()的实例源码
def surrogate_escape(error):
"""
Simulate the Python 3 ``surrogateescape`` handler, but for Python 2 only.
"""
chars = error.object[error.start:error.end]
assert len(chars) == 1
val = ord(chars)
val += 0xdc00
return __builtin__.unichr(val), error.end
def surrogate_escape(error):
"""
Simulate the Python 3 ``surrogateescape`` handler, but for Python 2 only.
"""
chars = error.object[error.start:error.end]
assert len(chars) == 1
val = ord(chars)
val += 0xdc00
return __builtin__.unichr(val), error.end
def _read_string(self):
self.__text.mark()
try:
src, value = [self._read_literal('"')], []
while True:
chunk = self.__text.read_until_any(('"', '\\'))
src.append(chunk)
value.append(chunk[:-1])
if chunk.endswith('\\'):
ch = self.__text.read()
src.append(ch)
if ch in '"/\\':
value.append(ch)
elif ch == 'b':
value.append('\b')
elif ch == 'f':
value.append('\f')
elif ch == 'n':
value.append('\n')
elif ch == 'r':
value.append('\r')
elif ch == 't':
value.append('\t')
elif ch == 'u':
n = 0
for i in range(4):
ch = self.__text.read()
src.append(ch)
n = 16 * n + int(ch, 16)
value.append(_chr(n))
else:
raise UnexpectedCharacter(ch)
else:
return "".join(src), "".join(value)
except AwaitingData:
self.__text.undo()
raise
def _read_string(self):
self.__text.mark()
try:
src, value = [self._read_literal('"')], []
while True:
chunk = self.__text.read_until_any(('"', '\\'))
src.append(chunk)
value.append(chunk[:-1])
if chunk.endswith('\\'):
ch = self.__text.read()
src.append(ch)
if ch in '"/\\':
value.append(ch)
elif ch == 'b':
value.append('\b')
elif ch == 'f':
value.append('\f')
elif ch == 'n':
value.append('\n')
elif ch == 'r':
value.append('\r')
elif ch == 't':
value.append('\t')
elif ch == 'u':
n = 0
for i in range(4):
ch = self.__text.read()
src.append(ch)
n = 16 * n + int(ch, 16)
value.append(_chr(n))
else:
raise UnexpectedCharacter(ch)
else:
return "".join(src), "".join(value)
except AwaitingData:
self.__text.undo()
raise
def unichr(ch):
if ch <= sys.maxunicode:
return _unichr(ch)
else:
ch -= 0x10000
return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00)
def runTest(self):
import cPyparsing as pp
import sys
if PY3:
unichr = chr
else:
from __builtin__ import unichr
a = '\u00bfC\u00f3mo esta usted?'
if not JYTHON_ENV:
ualphas = "".join( unichr(i) for i in range(sys.maxunicode)
if unichr(i).isalpha() )
else:
ualphas = "".join( unichr(i) for i in list(range(0xd800)) + list(range(0xe000,sys.maxunicode))
if unichr(i).isalpha() )
uword = pp.Word(ualphas).setParseAction(pp.upcaseTokens)
print = lambda *args: None
print(uword.searchString(a))
uword = pp.Word(ualphas).setParseAction(pp.downcaseTokens)
print(uword.searchString(a))
kw = pp.Keyword('mykey', caseless=True).setParseAction(pp.upcaseTokens).setResultsName('rname')
ret = kw.parseString('mykey')
print(ret.rname)
assert ret.rname=='MYKEY', "failed to upcase with named result"
kw = pp.Keyword('mykey', caseless=True).setParseAction(pp.pyparsing_common.upcaseTokens).setResultsName('rname')
ret = kw.parseString('mykey')
print(ret.rname)
assert ret.rname=='MYKEY', "failed to upcase with named result (pyparsing_common)"
kw = pp.Keyword('MYKEY', caseless=True).setParseAction(pp.pyparsing_common.downcaseTokens).setResultsName('rname')
ret = kw.parseString('mykey')
print(ret.rname)
assert ret.rname=='mykey', "failed to upcase with named result"
if not IRON_PYTHON_ENV:
#test html data
html = "<TR class=maintxt bgColor=#ffffff> \
<TD vAlign=top>?????????????, ??????</TD> \
<TD vAlign=top><STRONG>BenQ-Siemens CF61</STRONG></TD> \
"#.decode('utf-8')
# u'Manufacturer, model
text_manuf = '?????????????, ??????'
manufacturer = pp.Literal(text_manuf)
td_start, td_end = pp.makeHTMLTags("td")
manuf_body = td_start.suppress() + manufacturer + pp.SkipTo(td_end)("cells*") + td_end.suppress()
#~ manuf_body.setDebug()
#~ for tokens in manuf_body.scanString(html):
#~ print(tokens)