def test_strip_attributes_ns(self):
XML = self.etree.XML
xml = _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20" n:a="5"><x a="4" n:b="2"/></test>')
root = XML(xml)
self.etree.strip_attributes(root, 'a')
self.assertEqual(
_bytes('<test xmlns:n="http://test/ns" b="10" c="20" n:a="5"><x n:b="2"></x></test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_attributes(root, '{http://test/ns}a', 'c')
self.assertEqual(
_bytes('<test xmlns:n="http://test/ns" a="6" b="10"><x a="4" n:b="2"></x></test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_attributes(root, '{http://test/ns}*')
self.assertEqual(
_bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20"><x a="4"></x></test>'),
self._writeElement(root))
python类etree()的实例源码
def test_strip_elements(self):
XML = self.etree.XML
xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
root = XML(xml)
self.etree.strip_elements(root, 'a')
self.assertEqual(_bytes('<test><x></x></test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_elements(root, 'c')
self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
self._writeElement(root))
def test_strip_elements_ns(self):
XML = self.etree.XML
xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
root = XML(xml)
self.etree.strip_elements(root, 'a')
self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_elements(root, '{urn:a}b', 'c')
self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_elements(root, '{urn:a}*', 'c')
self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
self._writeElement(root))
def test_strip_tags_pi_comment(self):
XML = self.etree.XML
PI = self.etree.ProcessingInstruction
Comment = self.etree.Comment
xml = _bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT<?PI2?></test>\n<!--comment3-->\n<?PI1?>')
root = XML(xml)
self.etree.strip_tags(root, PI)
self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT</test>\n<!--comment3-->\n<?PI1?>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_tags(root, Comment)
self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT<?PI2?></test>\n<!--comment3-->\n<?PI1?>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_tags(root, PI, Comment)
self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT</test>\n<!--comment3-->\n<?PI1?>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_tags(root, Comment, PI)
self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT</test>\n<!--comment3-->\n<?PI1?>'),
self._writeElement(root))
def test_strip_tags_ns(self):
XML = self.etree.XML
xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>CT</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
root = XML(xml)
self.etree.strip_tags(root, 'a')
self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>CT</b>BT</n:a>AT<x>XA<b xmlns="urn:a"></b>BT<c xmlns="urn:x"></c>CTAT</x>XT</test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_tags(root, '{urn:a}b', 'c')
self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>CT</b>BT</n:a>AT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
self._writeElement(root))
root = XML(xml)
self.etree.strip_tags(root, '{urn:a}*', 'c')
self.assertEqual(_bytes('<test>TESTA<b>B<c xmlns="urn:c"></c>CT</b>BTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
self._writeElement(root))
def test_parse_remove_pis(self):
parse = self.etree.parse
tostring = self.etree.tostring
XMLParser = self.etree.XMLParser
xml = _bytes('<?test?><a><?A?><b><?B?><c/></b><?C?></a><?tail?>')
f = BytesIO(xml)
tree = parse(f)
self.assertEqual(
xml,
tostring(tree))
parser = XMLParser(remove_pis=True)
tree = parse(f, parser)
self.assertEqual(
_bytes('<a><b><c/></b></a>'),
tostring(tree))
def test_iterparse_pis(self):
# ET removes pis
iterparse = self.etree.iterparse
tostring = self.etree.tostring
ElementTree = self.etree.ElementTree
def name(event, el):
if event == 'pi':
return (el.target, el.text)
else:
return el.tag
f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
events = list(iterparse(f, events=('end', 'pi')))
root = events[-2][1]
self.assertEqual(8, len(events))
self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
('pid','d'), 'a', ('pie','e')],
[ name(*item) for item in events ])
self.assertEqual(
_bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
tostring(ElementTree(root)))
def test_iterparse_broken_recover(self):
iterparse = self.etree.iterparse
f = BytesIO('<a><b><c/></a>')
it = iterparse(f, events=('start', 'end'), recover=True)
events = [(ev, el.tag) for ev, el in it]
root = it.root
self.assertTrue(root is not None)
self.assertEqual(1, events.count(('start', 'a')))
self.assertEqual(1, events.count(('end', 'a')))
self.assertEqual(1, events.count(('start', 'b')))
self.assertEqual(1, events.count(('end', 'b')))
self.assertEqual(1, events.count(('start', 'c')))
self.assertEqual(1, events.count(('end', 'c')))
def test_iterparse_broken_multi_recover(self):
iterparse = self.etree.iterparse
f = BytesIO('<a><b><c/></d><b><c/></a></b>')
it = iterparse(f, events=('start', 'end'), recover=True)
events = [(ev, el.tag) for ev, el in it]
root = it.root
self.assertTrue(root is not None)
self.assertEqual(1, events.count(('start', 'a')))
self.assertEqual(1, events.count(('end', 'a')))
self.assertEqual(2, events.count(('start', 'b')))
self.assertEqual(2, events.count(('end', 'b')))
self.assertEqual(2, events.count(('start', 'c')))
self.assertEqual(2, events.count(('end', 'c')))
def test_feed_parser_recover_no_id_dict(self):
# test that recover mode plays nicely with the no-id-dict setup
parser = self.etree.XMLParser(recover=True, collect_ids=False)
parser.feed('<?xml version=')
parser.feed('"1.0"?><ro')
parser.feed('ot xml:id="123"><')
parser.feed('a test="works" xml:id=')
parser.feed('"321"><othertag/></root') # <a> not closed!
parser.feed('>')
root = parser.close()
self.assertEqual(root.tag, "root")
self.assertEqual(len(root), 1)
self.assertEqual(root[0].tag, "a")
self.assertEqual(root[0].get("test"), "works")
self.assertEqual(root[0].attrib, {
'test': 'works',
'{http://www.w3.org/XML/1998/namespace}id': '321'})
self.assertEqual(len(root[0]), 1)
self.assertEqual(root[0][0].tag, "othertag")
# FIXME: would be nice to get some errors logged ...
#self.assertTrue(len(parser.error_log) > 0, "error log is empty")
def test_elementtree_parser_target_type_error(self):
assertEqual = self.assertEqual
assertFalse = self.assertFalse
events = []
class Target(object):
def start(self, tag, attrib):
events.append("start")
assertFalse(attrib)
assertEqual("TAG", tag)
def end(self, tag):
events.append("end")
assertEqual("TAG", tag)
def close(self):
return "DONE" # no Element!
parser = self.etree.XMLParser(target=Target())
tree = self.etree.ElementTree()
self.assertRaises(TypeError,
tree.parse, BytesIO("<TAG/>"), parser=parser)
self.assertEqual(["start", "end"], events)
def test_parser_target_feed_no_id_dict(self):
# test that target parsing works nicely with the no-id-hash setup
events = []
class Target(object):
def start(self, tag, attrib):
events.append("start-" + tag)
def end(self, tag):
events.append("end-" + tag)
def data(self, data):
events.append("data-" + data)
def comment(self, text):
events.append("comment-" + text)
def close(self):
return "DONE"
parser = self.etree.XMLParser(target=Target(), collect_ids=False)
parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
done = parser.close()
self.assertEqual("DONE", done)
self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
"start-sub", "end-sub", "data-B", "end-root"],
events)
def test_parser_target_pi(self):
events = []
class Target(object):
def start(self, tag, attrib):
events.append("start-" + tag)
def end(self, tag):
events.append("end-" + tag)
def data(self, data):
events.append("data-" + data)
def pi(self, target, data):
events.append("pi-" + target + "-" + data)
def close(self):
return "DONE"
parser = self.etree.XMLParser(target=Target())
parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
done = parser.close()
self.assertEqual("DONE", done)
self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
"data-B", "end-root", "pi-test-c"],
events)
def test_parser_target_cdata(self):
events = []
class Target(object):
def start(self, tag, attrib):
events.append("start-" + tag)
def end(self, tag):
events.append("end-" + tag)
def data(self, data):
events.append("data-" + data)
def close(self):
return "DONE"
parser = self.etree.XMLParser(target=Target(),
strip_cdata=False)
parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
done = parser.close()
self.assertEqual("DONE", done)
self.assertEqual(["start-root", "data-A", "start-a",
"data-ca", "end-a", "data-B", "end-root"],
events)
def test_parser_target_recover(self):
events = []
class Target(object):
def start(self, tag, attrib):
events.append("start-" + tag)
def end(self, tag):
events.append("end-" + tag)
def data(self, data):
events.append("data-" + data)
def close(self):
events.append("close")
return "DONE"
parser = self.etree.XMLParser(target=Target(),
recover=True)
parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
done = parser.close()
self.assertEqual("DONE", done)
self.assertEqual(["start-root", "data-A", "start-a",
"data-ca", "end-a", "data-B",
"end-root", "close"],
events)
def test_iterwalk_attrib_ns(self):
iterwalk = self.etree.iterwalk
root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
attr_name = '{testns}bla'
events = []
iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
for event, elem in iterator:
events.append(event)
if event == 'start':
if elem.tag != '{ns1}a':
elem.set(attr_name, 'value')
self.assertEqual(
['start-ns', 'start', 'start', 'start-ns', 'start',
'end', 'end-ns', 'end', 'end', 'end-ns'],
events)
self.assertEqual(
None,
root.get(attr_name))
self.assertEqual(
'value',
root[0].get(attr_name))
def test_resolve_bytes_dtd(self):
parse = self.etree.parse
parser = self.etree.XMLParser(dtd_validation=True)
assertEqual = self.assertEqual
test_url = _str("__nosuch.dtd")
class MyResolver(self.etree.Resolver):
def resolve(self, url, id, context):
assertEqual(url, test_url)
return self.resolve_string(
(_str('''<!ENTITY myentity "%s">
<!ELEMENT doc ANY>''') % url).encode('utf-8'),
context)
parser.resolvers.add(MyResolver())
xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
tree = parse(StringIO(xml), parser)
root = tree.getroot()
self.assertEqual(root.text, test_url)
def test_resolve_filelike_dtd(self):
parse = self.etree.parse
parser = self.etree.XMLParser(dtd_validation=True)
assertEqual = self.assertEqual
test_url = _str("__nosuch.dtd")
class MyResolver(self.etree.Resolver):
def resolve(self, url, id, context):
assertEqual(url, test_url)
return self.resolve_file(
SillyFileLike(
_str('''<!ENTITY myentity "%s">
<!ELEMENT doc ANY>''') % url), context)
parser.resolvers.add(MyResolver())
xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
tree = parse(StringIO(xml), parser)
root = tree.getroot()
self.assertEqual(root.text, test_url)
def test_resolve_filename_dtd(self):
parse = self.etree.parse
parser = self.etree.XMLParser(attribute_defaults=True)
assertEqual = self.assertEqual
test_url = _str("__nosuch.dtd")
class MyResolver(self.etree.Resolver):
def resolve(self, url, id, context):
assertEqual(url, test_url)
return self.resolve_filename(
fileInTestDir('test.dtd'), context)
parser.resolvers.add(MyResolver())
xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
tree = parse(StringIO(xml), parser)
root = tree.getroot()
self.assertEqual(
root.attrib, {'default': 'valueA'})
self.assertEqual(
root[0].attrib, {'default': 'valueB'})
def test_resolve_filename_dtd_relative(self):
parse = self.etree.parse
parser = self.etree.XMLParser(attribute_defaults=True)
assertEqual = self.assertEqual
test_url = _str("__nosuch.dtd")
class MyResolver(self.etree.Resolver):
def resolve(self, url, id, context):
expected = fileUrlInTestDir(test_url)
url = url.replace('file://', 'file:') # depends on libxml2 version
expected = expected.replace('file://', 'file:')
assertEqual(url, expected)
return self.resolve_filename(
fileUrlInTestDir('test.dtd'), context)
parser.resolvers.add(MyResolver())
xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
tree = parse(StringIO(xml), parser,
base_url=fileUrlInTestDir('__test.xml'))
root = tree.getroot()
self.assertEqual(
root.attrib, {'default': 'valueA'})
self.assertEqual(
root[0].attrib, {'default': 'valueB'})
def test_resolve_file_dtd(self):
parse = self.etree.parse
parser = self.etree.XMLParser(attribute_defaults=True)
assertEqual = self.assertEqual
test_url = _str("__nosuch.dtd")
class MyResolver(self.etree.Resolver):
def resolve(self, url, id, context):
assertEqual(url, test_url)
return self.resolve_file(
open(fileInTestDir('test.dtd'), 'rb'), context)
parser.resolvers.add(MyResolver())
xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
tree = parse(StringIO(xml), parser)
root = tree.getroot()
self.assertEqual(
root.attrib, {'default': 'valueA'})
self.assertEqual(
root[0].attrib, {'default': 'valueB'})
def test_entity_parse(self):
parse = self.etree.parse
tostring = self.etree.tostring
parser = self.etree.XMLParser(resolve_entities=False)
Entity = self.etree.Entity
xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
tree = parse(BytesIO(xml), parser)
root = tree.getroot()
self.assertEqual(root[0].tag, Entity)
self.assertEqual(root[0].text, "&myentity;")
self.assertEqual(root[0].tail, None)
self.assertEqual(root[0].name, "myentity")
self.assertEqual(_bytes('<doc>&myentity;</doc>'),
tostring(root))
def test_entity_restructure(self):
xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
<root>
<child1/>
<child2/>
<child3> </child3>
</root>''')
parser = self.etree.XMLParser(resolve_entities=False)
root = etree.fromstring(xml, parser)
self.assertEqual([ el.tag for el in root ],
['child1', 'child2', 'child3'])
root[0] = root[-1]
self.assertEqual([ el.tag for el in root ],
['child3', 'child2'])
self.assertEqual(root[0][0].text, ' ')
self.assertEqual(root[0][0].name, 'nbsp')
def test_cdata_tail(self):
CDATA = self.etree.CDATA
Element = self.etree.Element
SubElement = self.etree.SubElement
tostring = self.etree.tostring
root = Element("root")
child = SubElement(root, 'child')
child.tail = CDATA('test')
self.assertEqual('test', child.tail)
self.assertEqual(_bytes('<root><child/><![CDATA[test]]></root>'),
tostring(root))
root = Element("root")
root.tail = CDATA('test')
self.assertEqual('test', root.tail)
self.assertEqual(_bytes('<root/><![CDATA[test]]>'),
tostring(root))
def test_comment_parse_empty(self):
ElementTree = self.etree.ElementTree
tostring = self.etree.tostring
xml = _bytes('<a><b/><!----><c/></a>')
f = BytesIO(xml)
doc = ElementTree(file=f)
a = doc.getroot()
self.assertEqual(
'',
a[1].text)
self.assertEqual(
xml,
tostring(a))
# ElementTree ignores comments
def test_iterancestors(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
a = Element('a')
b = SubElement(a, 'b')
c = SubElement(a, 'c')
d = SubElement(b, 'd')
self.assertEqual(
[],
list(a.iterancestors()))
self.assertEqual(
[a],
list(b.iterancestors()))
self.assertEqual(
[a],
list(c.iterancestors()))
self.assertEqual(
[b, a],
list(d.iterancestors()))
def test_iterancestors_tag(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
a = Element('a')
b = SubElement(a, 'b')
c = SubElement(a, 'c')
d = SubElement(b, 'd')
self.assertEqual(
[a],
list(d.iterancestors('a')))
self.assertEqual(
[a],
list(d.iterancestors(tag='a')))
self.assertEqual(
[b, a],
list(d.iterancestors('*')))
self.assertEqual(
[b, a],
list(d.iterancestors(tag='*')))
def test_getroottree(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
a = Element('a')
b = SubElement(a, 'b')
c = SubElement(a, 'c')
d = SubElement(b, 'd')
self.assertEqual(
a,
a.getroottree().getroot())
self.assertEqual(
a,
b.getroottree().getroot())
self.assertEqual(
a,
d.getroottree().getroot())
def test_getnext(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
a = Element('a')
b = SubElement(a, 'b')
c = SubElement(a, 'c')
self.assertEqual(
None,
a.getnext())
self.assertEqual(
c,
b.getnext())
self.assertEqual(
None,
c.getnext())
def test_getprevious(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
a = Element('a')
b = SubElement(a, 'b')
c = SubElement(a, 'c')
d = SubElement(b, 'd')
self.assertEqual(
None,
a.getprevious())
self.assertEqual(
b,
c.getprevious())
self.assertEqual(
None,
b.getprevious())