def __init__(self, file, encoding=None):
"""
@param file A file-like object holding your input. Only the read()
method must be implemented.
@param encoding If you set the optional encoding argument, then the
data will be decoded on the fly.
"""
if encoding is not None:
# wrap input in a decoding reader
reader = codecs.lookup(encoding)[2]
file = reader(file)
data = file.read()
ANTLRStringStream.__init__(self, data)
# I guess the ANTLR prefix exists only to avoid a name clash with some Java
# mumbojumbo. A plain "StringStream" looks better to me, which should be
# the preferred name in Python.
python类lookup()的实例源码
def _populate_class_variables():
lookup = {}
reverse_lookup = {}
characters_for_re = []
for codepoint, name in list(codepoint2name.items()):
character = unichr(codepoint)
if codepoint != 34:
# There's no point in turning the quotation mark into
# ", unless it happens within an attribute value, which
# is handled elsewhere.
characters_for_re.append(character)
lookup[character] = name
# But we do want to turn " into the quotation mark.
reverse_lookup[name] = character
re_definition = "[%s]" % "".join(characters_for_re)
return lookup, reverse_lookup, re.compile(re_definition)
def __init__(self, file, encoding=None):
"""
@param file A file-like object holding your input. Only the read()
method must be implemented.
@param encoding If you set the optional encoding argument, then the
data will be decoded on the fly.
"""
if encoding is not None:
# wrap input in a decoding reader
reader = codecs.lookup(encoding)[2]
file = reader(file)
data = file.read()
ANTLRStringStream.__init__(self, data)
# I guess the ANTLR prefix exists only to avoid a name clash with some Java
# mumbojumbo. A plain "StringStream" looks better to me, which should be
# the preferred name in Python.
def __init__(self, file, encoding=None):
"""
@param file A file-like object holding your input. Only the read()
method must be implemented.
@param encoding If you set the optional encoding argument, then the
data will be decoded on the fly.
"""
if encoding is not None:
# wrap input in a decoding reader
reader = codecs.lookup(encoding)[2]
file = reader(file)
data = file.read()
ANTLRStringStream.__init__(self, data)
# I guess the ANTLR prefix exists only to avoid a name clash with some Java
# mumbojumbo. A plain "StringStream" looks better to me, which should be
# the preferred name in Python.
def _get_encoding(encoding_or_label):
"""
Accept either an encoding object or label.
:param encoding: An :class:`Encoding` object or a label string.
:returns: An :class:`Encoding` object.
:raises: :exc:`~exceptions.LookupError` for an unknown label.
"""
if hasattr(encoding_or_label, 'codec_info'):
return encoding_or_label
encoding = lookup(encoding_or_label)
if encoding is None:
raise LookupError('Unknown encoding label: %r' % encoding_or_label)
return encoding
def _populate_class_variables():
lookup = {}
reverse_lookup = {}
characters_for_re = []
for codepoint, name in list(codepoint2name.items()):
character = unichr(codepoint)
if codepoint != 34:
# There's no point in turning the quotation mark into
# ", unless it happens within an attribute value, which
# is handled elsewhere.
characters_for_re.append(character)
lookup[character] = name
# But we do want to turn " into the quotation mark.
reverse_lookup[name] = character
re_definition = "[%s]" % "".join(characters_for_re)
return lookup, reverse_lookup, re.compile(re_definition)
def aliasmbcs():
"""On Windows, some default encodings are not provided by Python,
while they are always available as "mbcs" in each locale. Make
them usable by aliasing to "mbcs" in such a case."""
if sys.platform == 'win32':
import locale, codecs
enc = locale.getdefaultlocale()[1]
if enc.startswith('cp'): # "cp***" ?
try:
codecs.lookup(enc)
except LookupError:
import encodings
encodings._cache[enc] = encodings._unknown
encodings.aliases.aliases[enc] = 'mbcs'
def lookup(label):
"""
Look for an encoding by its label.
This is the spec’s `get an encoding
<http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
Supported labels are listed there.
:param label: A string.
:returns:
An :class:`Encoding` object, or :obj:`None` for an unknown label.
"""
# Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
label = ascii_lower(label.strip('\t\n\f\r '))
name = LABELS.get(label)
if name is None:
return None
encoding = CACHE.get(name)
if encoding is None:
if name == 'x-user-defined':
from .x_user_defined import codec_info
else:
python_name = PYTHON_NAMES.get(name, name)
# Any python_name value that gets to here should be valid.
codec_info = codecs.lookup(python_name)
encoding = Encoding(name, codec_info)
CACHE[name] = encoding
return encoding
def aliasmbcs():
"""On Windows, some default encodings are not provided by Python,
while they are always available as "mbcs" in each locale. Make
them usable by aliasing to "mbcs" in such a case."""
if sys.platform == 'win32':
import locale, codecs
enc = locale.getdefaultlocale()[1]
if enc.startswith('cp'): # "cp***" ?
try:
codecs.lookup(enc)
except LookupError:
import encodings
encodings._cache[enc] = encodings._unknown
encodings.aliases.aliases[enc] = 'mbcs'
def lookup(label):
"""
Look for an encoding by its label.
This is the spec’s `get an encoding
<http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
Supported labels are listed there.
:param label: A string.
:returns:
An :class:`Encoding` object, or :obj:`None` for an unknown label.
"""
# Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
label = ascii_lower(label.strip('\t\n\f\r '))
name = LABELS.get(label)
if name is None:
return None
encoding = CACHE.get(name)
if encoding is None:
if name == 'x-user-defined':
from .x_user_defined import codec_info
else:
python_name = PYTHON_NAMES.get(name, name)
# Any python_name value that gets to here should be valid.
codec_info = codecs.lookup(python_name)
encoding = Encoding(name, codec_info)
CACHE[name] = encoding
return encoding
def aliasmbcs():
"""On Windows, some default encodings are not provided by Python,
while they are always available as "mbcs" in each locale. Make
them usable by aliasing to "mbcs" in such a case."""
if sys.platform == 'win32':
import locale, codecs
enc = locale.getdefaultlocale()[1]
if enc.startswith('cp'): # "cp***" ?
try:
codecs.lookup(enc)
except LookupError:
import encodings
encodings._cache[enc] = encodings._unknown
encodings.aliases.aliases[enc] = 'mbcs'
def __init__(self, input_charset=DEFAULT_CHARSET):
# RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
# unicode because its .lower() is locale insensitive. If the argument
# is already a unicode, we leave it at that, but ensure that the
# charset is ASCII, as the standard (RFC XXX) requires.
try:
if isinstance(input_charset, unicode):
input_charset.encode('ascii')
else:
input_charset = unicode(input_charset, 'ascii')
except UnicodeError:
raise errors.CharsetError(input_charset)
input_charset = input_charset.lower().encode('ascii')
# Set the input charset after filtering through the aliases and/or codecs
if not (input_charset in ALIASES or input_charset in CHARSETS):
try:
input_charset = codecs.lookup(input_charset).name
except LookupError:
pass
self.input_charset = ALIASES.get(input_charset, input_charset)
# We can try to guess which encoding and conversion to use by the
# charset_map dictionary. Try that first, but let the user override
# it.
henc, benc, conv = CHARSETS.get(self.input_charset,
(SHORTEST, BASE64, None))
if not conv:
conv = self.input_charset
# Set the attributes, allowing the arguments to override the default.
self.header_encoding = henc
self.body_encoding = benc
self.output_charset = ALIASES.get(conv, conv)
# Now set the codecs. If one isn't defined for input_charset,
# guess and try a Unicode codec with the same name as input_codec.
self.input_codec = CODEC_MAP.get(self.input_charset,
self.input_charset)
self.output_codec = CODEC_MAP.get(self.output_charset,
self.output_charset)
def toprettyxml(self, indent="\t", newl="\n", encoding = None):
# indent = the indentation string to prepend, per level
# newl = the newline string to append
writer = _get_StringIO()
if encoding is not None:
import codecs
# Can't use codecs.getwriter to preserve 2.0 compatibility
writer = codecs.lookup(encoding)[3](writer)
if self.nodeType == Node.DOCUMENT_NODE:
# Can pass encoding only to document, to put it into XML header
self.writexml(writer, "", indent, newl, encoding)
else:
self.writexml(writer, "", indent, newl)
return writer.getvalue()
def codec_search(name):
if name == 'cp65001':
return codecs.lookup('utf-8')
return None
def _codec(self, charset):
if not charset: return charset
codec = None
try:
codecs.lookup(charset)
codec = charset
except (LookupError, ValueError):
pass
return codec
def get_eci_assignment_number(encoding):
"""\
Returns the ECI number for the provided encoding.
:param str encoding: A encoding name
:return str: The ECI number.
"""
try:
return consts.ECI_ASSIGNMENT_NUM[codecs.lookup(encoding).name]
except KeyError:
raise QRCodeError('Unknown ECI assignment number for encoding "{0}".'
.format(encoding))
def _codec(self, charset):
if not charset:
return charset
codec = None
try:
codecs.lookup(charset)
codec = charset
except (LookupError, ValueError):
pass
return codec
def _codec(self, charset):
if not charset:
return charset
codec = None
try:
codecs.lookup(charset)
codec = charset
except (LookupError, ValueError):
pass
return codec
# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
def __getitem__(self, key):
"""Besides index lookup (getting item n) you can also pass it a string
to get the quality for the item. If the item is not in the list, the
returned quality is ``0``.
"""
if isinstance(key, string_types):
return self.quality(key)
return list.__getitem__(self, key)
def quality(self, key):
"""Returns the quality of the key.
.. versionadded:: 0.6
In previous versions you had to use the item-lookup syntax
(eg: ``obj[key]`` instead of ``obj.quality(key)``)
"""
for item, quality in self:
if self._value_matches(key, item):
return quality
return 0