def _parseFile(self, file):
from xml.parsers.expat import ParserCreate
parser = ParserCreate()
parser.StartElementHandler = self._startElementHandler
parser.EndElementHandler = self._endElementHandler
parser.CharacterDataHandler = self._characterDataHandler
pos = 0
while True:
chunk = file.read(BUFSIZE)
if not chunk:
parser.Parse(chunk, 1)
break
pos = pos + len(chunk)
if self.progress:
self.progress.set(pos // 100)
parser.Parse(chunk, 0)
python类ParserCreate()的实例源码
def __init__(self):
"""
Parses the lds.xml file to load the LDS temple code to name
maps
"""
self.__temple_codes = {}
self.__temple_to_abrev = {}
self.__current_temple = ""
self.__tlist = []
lds_filename = os.path.expanduser(os.path.join(DATA_DIR, "lds.xml"))
try:
parser = ParserCreate()
parser.StartElementHandler = self.__start_element
parser.EndElementHandler = self.__end_element
parser.CharacterDataHandler = self.__characters
with open(lds_filename, 'rb') as xml_file:
parser.ParseFile(xml_file)
except Exception as msg:
LOG.error(str(msg))
def parse(self, filename):
'Create a SAX parser and parse filename '
parser = expat.ParserCreate()
# SAX event handlers
parser.StartElementHandler = self.start_element
parser.EndElementHandler = self.end_element
# Parse the XML File
with open(filename, 'rb') as xml_file:
parser.ParseFile(xml_file)
return self.root
#------------------------------------------------------------------------
#
# _Holidays
#
#------------------------------------------------------------------------
def _fetchGlyphName(glyphPath):
# Given a path to an existing .glif file, get the glyph name
# from the XML data.
from xml.parsers.expat import ParserCreate
p = ParserCreate()
p.StartElementHandler = _startElementHandler
p.returns_unicode = True
f = open(glyphPath)
try:
p.ParseFile(f)
except _DoneParsing, why:
glyphName = why.args[0]
if glyphName is None:
raise ValueError, (".glif file doen't have a <glyph> top-level "
"element: %r" % glyphPath)
else:
assert 0, "it's not expected that parsing the file ends normally"
return glyphName
def test_issue9402(self):
# create an ExternalEntityParserCreate with buffer text
class ExternalOutputter:
def __init__(self, parser):
self.parser = parser
self.parser_result = None
def ExternalEntityRefHandler(self, context, base, sysId, pubId):
external_parser = self.parser.ExternalEntityParserCreate("")
self.parser_result = external_parser.Parse("", 1)
return 1
parser = expat.ParserCreate(namespace_separator='!')
parser.buffer_text = 1
out = ExternalOutputter(parser)
parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
parser.Parse(data, 1)
self.assertEqual(out.parser_result, 1)
def test_parse_only_xml_data(self):
# http://python.org/sf/1296433
#
xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
# this one doesn't crash
#xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
class SpecificException(Exception):
pass
def handler(text):
raise SpecificException
parser = expat.ParserCreate()
parser.CharacterDataHandler = handler
self.assertRaises(Exception, parser.Parse, xml)
def test_unchanged_size(self):
xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
xml2 = 'a'*512 + '</s>'
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 512
parser.buffer_text = 1
# Feed 512 bytes of character data: the handler should be called
# once.
self.n = 0
parser.Parse(xml1)
self.assertEqual(self.n, 1)
# Reassign to buffer_size, but assign the same size.
parser.buffer_size = parser.buffer_size
self.assertEqual(self.n, 1)
# Try parsing rest of the document
parser.Parse(xml2)
self.assertEqual(self.n, 2)
def test_use_foreign_dtd(self):
"""
If UseForeignDTD is passed True and a document without an external
entity reference is parsed, ExternalEntityRefHandler is first called
with None for the public and system ids.
"""
handler_call_args = []
def resolve_entity(context, base, system_id, public_id):
handler_call_args.append((public_id, system_id))
return 1
parser = expat.ParserCreate()
parser.UseForeignDTD(True)
parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
parser.ExternalEntityRefHandler = resolve_entity
parser.Parse("<?xml version='1.0'?><element/>")
self.assertEqual(handler_call_args, [(None, None)])
def test_parse_only_xml_data(self):
# http://python.org/sf/1296433
#
xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
# this one doesn't crash
#xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
class SpecificException(Exception):
pass
def handler(text):
raise SpecificException
parser = expat.ParserCreate()
parser.CharacterDataHandler = handler
self.assertRaises(Exception, parser.Parse, xml)
def test_unchanged_size(self):
xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
xml2 = 'a'*512 + '</s>'
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 512
parser.buffer_text = 1
# Feed 512 bytes of character data: the handler should be called
# once.
self.n = 0
parser.Parse(xml1)
self.assertEqual(self.n, 1)
# Reassign to buffer_size, but assign the same size.
parser.buffer_size = parser.buffer_size
self.assertEqual(self.n, 1)
# Try parsing rest of the document
parser.Parse(xml2)
self.assertEqual(self.n, 2)
def test_parse_only_xml_data(self):
# http://python.org/sf/1296433
#
xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
# this one doesn't crash
#xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
class SpecificException(Exception):
pass
def handler(text):
raise SpecificException
parser = expat.ParserCreate()
parser.CharacterDataHandler = handler
self.assertRaises(Exception, parser.Parse, xml)
def test_unchanged_size(self):
xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
xml2 = 'a'*512 + '</s>'
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 512
parser.buffer_text = 1
# Feed 512 bytes of character data: the handler should be called
# once.
self.n = 0
parser.Parse(xml1)
self.assertEqual(self.n, 1)
# Reassign to buffer_size, but assign the same size.
parser.buffer_size = parser.buffer_size
self.assertEqual(self.n, 1)
# Try parsing rest of the document
parser.Parse(xml2)
self.assertEqual(self.n, 2)
def test_ignore_use_foreign_dtd(self):
"""
If UseForeignDTD is passed True and a document with an external
entity reference is parsed, ExternalEntityRefHandler is called with
the public and system ids from the document.
"""
handler_call_args = []
def resolve_entity(context, base, system_id, public_id):
handler_call_args.append((public_id, system_id))
return 1
parser = expat.ParserCreate()
parser.UseForeignDTD(True)
parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
parser.ExternalEntityRefHandler = resolve_entity
parser.Parse(
"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
self.assertEqual(handler_call_args, [("bar", "baz")])
def test_issue9402(self):
# create an ExternalEntityParserCreate with buffer text
class ExternalOutputter:
def __init__(self, parser):
self.parser = parser
self.parser_result = None
def ExternalEntityRefHandler(self, context, base, sysId, pubId):
external_parser = self.parser.ExternalEntityParserCreate("")
self.parser_result = external_parser.Parse(b"", 1)
return 1
parser = expat.ParserCreate(namespace_separator='!')
parser.buffer_text = 1
out = ExternalOutputter(parser)
parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
parser.Parse(data, 1)
self.assertEqual(out.parser_result, 1)
def test_parse_only_xml_data(self):
# http://python.org/sf/1296433
#
xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
# this one doesn't crash
#xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
class SpecificException(Exception):
pass
def handler(text):
raise SpecificException
parser = expat.ParserCreate()
parser.CharacterDataHandler = handler
self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
def test_unchanged_size(self):
xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
xml2 = b'a'*512 + b'</s>'
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 512
parser.buffer_text = 1
# Feed 512 bytes of character data: the handler should be called
# once.
self.n = 0
parser.Parse(xml1)
self.assertEqual(self.n, 1)
# Reassign to buffer_size, but assign the same size.
parser.buffer_size = parser.buffer_size
self.assertEqual(self.n, 1)
# Try parsing rest of the document
parser.Parse(xml2)
self.assertEqual(self.n, 2)
def test_ignore_use_foreign_dtd(self):
"""
If UseForeignDTD is passed True and a document with an external
entity reference is parsed, ExternalEntityRefHandler is called with
the public and system ids from the document.
"""
handler_call_args = []
def resolve_entity(context, base, system_id, public_id):
handler_call_args.append((public_id, system_id))
return 1
parser = expat.ParserCreate()
parser.UseForeignDTD(True)
parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
parser.ExternalEntityRefHandler = resolve_entity
parser.Parse(
b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
self.assertEqual(handler_call_args, [("bar", "baz")])
def LoadTree(self, filename):
# Create a parser
#??Load????????????
self.DeleteAllItems()
self._root = self.AddRoot("Root-No-Meaning")
self.nodeStack = [self._root]
self.ordeDic = {}
Parser = expat.ParserCreate()
# Tell the parser what the start element handler is
Parser.StartElementHandler = self.StartElement
Parser.EndElementHandler = self.EndElement
Parser.CharacterDataHandler = self.CharacterData
# Parse the XML File??????????????????????
Parser.Parse(open(filename,'rb').read(), 1)
def test_parse_only_xml_data(self):
# http://python.org/sf/1296433
#
xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
# this one doesn't crash
#xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
class SpecificException(Exception):
pass
def handler(text):
raise SpecificException
parser = expat.ParserCreate()
parser.CharacterDataHandler = handler
self.assertRaises(Exception, parser.Parse, xml)
def test_unchanged_size(self):
xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
xml2 = 'a'*512 + '</s>'
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 512
parser.buffer_text = 1
# Feed 512 bytes of character data: the handler should be called
# once.
self.n = 0
parser.Parse(xml1)
self.assertEqual(self.n, 1)
# Reassign to buffer_size, but assign the same size.
parser.buffer_size = parser.buffer_size
self.assertEqual(self.n, 1)
# Try parsing rest of the document
parser.Parse(xml2)
self.assertEqual(self.n, 2)
def test_issue9402(self):
# create an ExternalEntityParserCreate with buffer text
class ExternalOutputter:
def __init__(self, parser):
self.parser = parser
self.parser_result = None
def ExternalEntityRefHandler(self, context, base, sysId, pubId):
external_parser = self.parser.ExternalEntityParserCreate("")
self.parser_result = external_parser.Parse(b"", 1)
return 1
parser = expat.ParserCreate(namespace_separator='!')
parser.buffer_text = 1
out = ExternalOutputter(parser)
parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
parser.Parse(data, 1)
self.assertEqual(out.parser_result, 1)
def test_exception(self):
parser = expat.ParserCreate()
parser.StartElementHandler = self.StartElementHandler
try:
parser.Parse(b"<a><b><c/></b></a>", 1)
self.fail()
except RuntimeError as e:
self.assertEqual(e.args[0], 'a',
"Expected RuntimeError for element 'a', but" + \
" found %r" % e.args[0])
# Check that the traceback contains the relevant line in pyexpat.c
entries = traceback.extract_tb(e.__traceback__)
self.assertEqual(len(entries), 3)
self.check_traceback_entry(entries[0],
"test_pyexpat.py", "test_exception")
self.check_traceback_entry(entries[1],
"pyexpat.c", "StartElement")
self.check_traceback_entry(entries[2],
"test_pyexpat.py", "StartElementHandler")
if sysconfig.is_python_build():
self.assertIn('call_with_frame("StartElement"', entries[1][3])
# Test Current* members:
def parse(self, fileobj):
from xml.parsers.expat import ParserCreate
parser = ParserCreate()
parser.StartElementHandler = self.handleBeginElement
parser.EndElementHandler = self.handleEndElement
parser.CharacterDataHandler = self.handleData
parser.ParseFile(fileobj)
return self.root
def __init__(self, target):
self._parser = parser = expat.ParserCreate(None, None)
self._target = target
parser.StartElementHandler = target.start
parser.EndElementHandler = target.end
parser.CharacterDataHandler = target.data
encoding = None
if not parser.returns_unicode:
encoding = "utf-8"
target.xml(encoding, None)
def __init__(self):
from xml.parsers.expat import ParserCreate
self.parser = ParserCreate()
self.parser.StartElementHandler = self.startElement_
self.parser.EndElementHandler = self.endElement_
self.parser.CharacterDataHandler = self.addCharacterData_
self.root = None
self.stack = []
def parse(self, text):
from xml.parsers.expat import ParserCreate
parser = ParserCreate()
parser.StartElementHandler = self.startElementHandler
parser.EndElementHandler = self.endElementHandler
parser.Parse(text)
def __init__(self, data = None):
self._parser = ParserCreate()
self._parser.StartElementHandler = self.start
self._parser.EndElementHandler = self.end
self._parser.CharacterDataHandler = self.data
self.result = None
if data:
self.feed(data)
self.close()
def Parse(self,filename):
# Create a SAX parser
Parser = expat.ParserCreate()
# SAX event handlers
Parser.StartElementHandler = self.StartElement
Parser.EndElementHandler = self.EndElement
Parser.CharacterDataHandler = self.CharacterData
# Parse the XML File
ParserStatus = Parser.Parse(open(filename,'r').read(), 1)
return self.root
def parse(self, fileobj):
from xml.parsers.expat import ParserCreate
parser = ParserCreate()
parser.StartElementHandler = self.handleBeginElement
parser.EndElementHandler = self.handleEndElement
parser.CharacterDataHandler = self.handleData
parser.ParseFile(fileobj)
return self.root
def reset(self):
if self._namespaces:
self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
intern=self._interning)
self._parser.namespace_prefixes = 1
self._parser.StartElementHandler = self.start_element_ns
self._parser.EndElementHandler = self.end_element_ns
else:
self._parser = expat.ParserCreate(self._source.getEncoding(),
intern = self._interning)
self._parser.StartElementHandler = self.start_element
self._parser.EndElementHandler = self.end_element
self._reset_cont_handler()
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
self._parser.NotationDeclHandler = self.notation_decl
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
self._decl_handler_prop = None
if self._lex_handler_prop:
self._reset_lex_handler_prop()
# self._parser.DefaultHandler =
# self._parser.DefaultHandlerExpand =
# self._parser.NotStandaloneHandler =
self._parser.ExternalEntityRefHandler = self.external_entity_ref
try:
self._parser.SkippedEntityHandler = self.skipped_entity_handler
except AttributeError:
# This pyexpat does not support SkippedEntity
pass
self._parser.SetParamEntityParsing(
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
self._parsing = 0
self._entity_stack = []
# Locator methods