python类XMLParser()的实例源码

test_xml_etree.py 文件源码 项目:oil 作者: oilshell 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def custom_builder():
    """
    Test parser w. custom builder.

    >>> with open(SIMPLE_XMLFILE) as f:
    ...     data = f.read()
    >>> class Builder:
    ...     def start(self, tag, attrib):
    ...         print "start", tag
    ...     def end(self, tag):
    ...         print "end", tag
    ...     def data(self, text):
    ...         pass
    >>> builder = Builder()
    >>> parser = ET.XMLParser(target=builder)
    >>> parser.feed(data)
    start root
    start element
    end element
    start element
    end element
    start empty-element
    end empty-element
    end root

    >>> with open(SIMPLE_NS_XMLFILE) as f:
    ...     data = f.read()
    >>> class Builder:
    ...     def start(self, tag, attrib):
    ...         print "start", tag
    ...     def end(self, tag):
    ...         print "end", tag
    ...     def data(self, text):
    ...         pass
    ...     def pi(self, target, data):
    ...         print "pi", target, repr(data)
    ...     def comment(self, data):
    ...         print "comment", repr(data)
    >>> builder = Builder()
    >>> parser = ET.XMLParser(target=builder)
    >>> parser.feed(data)
    pi pi 'data'
    comment ' comment '
    start {namespace}root
    start {namespace}element
    end {namespace}element
    start {namespace}element
    end {namespace}element
    start {namespace}empty-element
    end {namespace}empty-element
    end {namespace}root

    """
test_xml_etree.py 文件源码 项目:python2-tracer 作者: extremecoders-re 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def interface():
    r"""
    Test element tree interface.

    >>> element = ET.Element("tag")
    >>> check_element(element)
    >>> tree = ET.ElementTree(element)
    >>> check_element(tree.getroot())

    >>> element = ET.Element("t\xe4g", key="value")
    >>> tree = ET.ElementTree(element)
    >>> repr(element)   # doctest: +ELLIPSIS
    "<Element 't\\xe4g' at 0x...>"
    >>> element = ET.Element("tag", key="value")

    Make sure all standard element methods exist.

    >>> check_method(element.append)
    >>> check_method(element.extend)
    >>> check_method(element.insert)
    >>> check_method(element.remove)
    >>> check_method(element.getchildren)
    >>> check_method(element.find)
    >>> check_method(element.iterfind)
    >>> check_method(element.findall)
    >>> check_method(element.findtext)
    >>> check_method(element.clear)
    >>> check_method(element.get)
    >>> check_method(element.set)
    >>> check_method(element.keys)
    >>> check_method(element.items)
    >>> check_method(element.iter)
    >>> check_method(element.itertext)
    >>> check_method(element.getiterator)

    These methods return an iterable. See bug 6472.

    >>> check_method(element.iter("tag").next)
    >>> check_method(element.iterfind("tag").next)
    >>> check_method(element.iterfind("*").next)
    >>> check_method(tree.iter("tag").next)
    >>> check_method(tree.iterfind("tag").next)
    >>> check_method(tree.iterfind("*").next)

    These aliases are provided:

    >>> assert ET.XML == ET.fromstring
    >>> assert ET.PI == ET.ProcessingInstruction
    >>> assert ET.XMLParser == ET.XMLTreeBuilder
    """
test_xml_etree.py 文件源码 项目:python2-tracer 作者: extremecoders-re 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def parsefile():
    """
    Test parsing from file.

    >>> tree = ET.parse(SIMPLE_XMLFILE)
    >>> normalize_crlf(tree)
    >>> tree.write(sys.stdout)
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>
    >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
    >>> normalize_crlf(tree)
    >>> tree.write(sys.stdout)
    <ns0:root xmlns:ns0="namespace">
       <ns0:element key="value">text</ns0:element>
       <ns0:element>text</ns0:element>tail
       <ns0:empty-element />
    </ns0:root>

    >>> with open(SIMPLE_XMLFILE) as f:
    ...     data = f.read()

    >>> parser = ET.XMLParser()
    >>> parser.version  # doctest: +ELLIPSIS
    'Expat ...'
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>

    >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>

    >>> target = ET.TreeBuilder()
    >>> parser = ET.XMLParser(target=target)
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>
    """
test_xml_etree.py 文件源码 项目:python2-tracer 作者: extremecoders-re 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def custom_builder():
    """
    Test parser w. custom builder.

    >>> with open(SIMPLE_XMLFILE) as f:
    ...     data = f.read()
    >>> class Builder:
    ...     def start(self, tag, attrib):
    ...         print "start", tag
    ...     def end(self, tag):
    ...         print "end", tag
    ...     def data(self, text):
    ...         pass
    >>> builder = Builder()
    >>> parser = ET.XMLParser(target=builder)
    >>> parser.feed(data)
    start root
    start element
    end element
    start element
    end element
    start empty-element
    end empty-element
    end root

    >>> with open(SIMPLE_NS_XMLFILE) as f:
    ...     data = f.read()
    >>> class Builder:
    ...     def start(self, tag, attrib):
    ...         print "start", tag
    ...     def end(self, tag):
    ...         print "end", tag
    ...     def data(self, text):
    ...         pass
    ...     def pi(self, target, data):
    ...         print "pi", target, repr(data)
    ...     def comment(self, data):
    ...         print "comment", repr(data)
    >>> builder = Builder()
    >>> parser = ET.XMLParser(target=builder)
    >>> parser.feed(data)
    pi pi 'data'
    comment ' comment '
    start {namespace}root
    start {namespace}element
    end {namespace}element
    start {namespace}element
    end {namespace}element
    start {namespace}empty-element
    end {namespace}empty-element
    end {namespace}root

    """
index.py 文件源码 项目:Narralyzer 作者: WillemJan 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def tei_to_chapters(fname):
    """ Convert a TEI 2 xml into an array of chapters with text,
    and return the title. """

    data = codecs.open(fname, 'r', 'utf-8').read().replace('&nbsp', '')

    utf8_parser = etree.XMLParser(encoding='utf-8')
    book = etree.fromstring(data.encode('utf-8'), parser=utf8_parser)

    all_text = u""
    chapters = []
    chap_title = ''
    text = ''
    title = ''

    for item in book.iter():
        if item.tag == 'author':
            author = item.text
        if item.tag == 'title' and not title and \
                item.attrib.get('type') and item.attrib.get('type') == 'main':
            title = item.text

        if item.tag == 'head':
            if item.attrib and item.attrib.get('rend') and \
            item.attrib.get('rend') == 'h2' and not item.text is None:
                chap_title = item.text

        if item.tag == 'head':
            if item.attrib and item.attrib.get('rend') and \
            item.attrib.get('rend') == 'h3' and not item.text is None:
                chap_title += '\n' + item.text

        if item.tag == 'div':
            if item.attrib and item.attrib.get('type') and \
            item.attrib.get('type') == 'chapter':
                all_text += text
                chapters.append([chap_title, text])
                text = ''
                chap_title = ''

        if 'rend' in item.attrib and not item.text is None:
            text += item.text + "\n"
        if item.tag == "p" and not item.text is None:
            text += item.text + "\n"

    chapters.append([chap_title, text])
    return author, title, chapters, all_text
test_xml_etree.py 文件源码 项目:pefile.pypy 作者: cloudtracer 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def interface():
    r"""
    Test element tree interface.

    >>> element = ET.Element("tag")
    >>> check_element(element)
    >>> tree = ET.ElementTree(element)
    >>> check_element(tree.getroot())

    >>> element = ET.Element("t\xe4g", key="value")
    >>> tree = ET.ElementTree(element)
    >>> repr(element)   # doctest: +ELLIPSIS
    "<Element 't\\xe4g' at 0x...>"
    >>> element = ET.Element("tag", key="value")

    Make sure all standard element methods exist.

    >>> check_method(element.append)
    >>> check_method(element.extend)
    >>> check_method(element.insert)
    >>> check_method(element.remove)
    >>> check_method(element.getchildren)
    >>> check_method(element.find)
    >>> check_method(element.iterfind)
    >>> check_method(element.findall)
    >>> check_method(element.findtext)
    >>> check_method(element.clear)
    >>> check_method(element.get)
    >>> check_method(element.set)
    >>> check_method(element.keys)
    >>> check_method(element.items)
    >>> check_method(element.iter)
    >>> check_method(element.itertext)
    >>> check_method(element.getiterator)

    These methods return an iterable. See bug 6472.

    >>> check_method(element.iter("tag").next)
    >>> check_method(element.iterfind("tag").next)
    >>> check_method(element.iterfind("*").next)
    >>> check_method(tree.iter("tag").next)
    >>> check_method(tree.iterfind("tag").next)
    >>> check_method(tree.iterfind("*").next)

    These aliases are provided:

    >>> assert ET.XML == ET.fromstring
    >>> assert ET.PI == ET.ProcessingInstruction
    >>> assert ET.XMLParser == ET.XMLTreeBuilder
    """
test_xml_etree.py 文件源码 项目:pefile.pypy 作者: cloudtracer 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def parsefile():
    """
    Test parsing from file.

    >>> tree = ET.parse(SIMPLE_XMLFILE)
    >>> normalize_crlf(tree)
    >>> tree.write(sys.stdout)
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>
    >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
    >>> normalize_crlf(tree)
    >>> tree.write(sys.stdout)
    <ns0:root xmlns:ns0="namespace">
       <ns0:element key="value">text</ns0:element>
       <ns0:element>text</ns0:element>tail
       <ns0:empty-element />
    </ns0:root>

    >>> with open(SIMPLE_XMLFILE) as f:
    ...     data = f.read()

    >>> parser = ET.XMLParser()
    >>> parser.version  # doctest: +ELLIPSIS
    'Expat ...'
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>

    >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>

    >>> target = ET.TreeBuilder()
    >>> parser = ET.XMLParser(target=target)
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>
    """
test_xml_etree.py 文件源码 项目:pefile.pypy 作者: cloudtracer 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def custom_builder():
    """
    Test parser w. custom builder.

    >>> with open(SIMPLE_XMLFILE) as f:
    ...     data = f.read()
    >>> class Builder:
    ...     def start(self, tag, attrib):
    ...         print "start", tag
    ...     def end(self, tag):
    ...         print "end", tag
    ...     def data(self, text):
    ...         pass
    >>> builder = Builder()
    >>> parser = ET.XMLParser(target=builder)
    >>> parser.feed(data)
    start root
    start element
    end element
    start element
    end element
    start empty-element
    end empty-element
    end root

    >>> with open(SIMPLE_NS_XMLFILE) as f:
    ...     data = f.read()
    >>> class Builder:
    ...     def start(self, tag, attrib):
    ...         print "start", tag
    ...     def end(self, tag):
    ...         print "end", tag
    ...     def data(self, text):
    ...         pass
    ...     def pi(self, target, data):
    ...         print "pi", target, repr(data)
    ...     def comment(self, data):
    ...         print "comment", repr(data)
    >>> builder = Builder()
    >>> parser = ET.XMLParser(target=builder)
    >>> parser.feed(data)
    pi pi 'data'
    comment ' comment '
    start {namespace}root
    start {namespace}element
    end {namespace}element
    start {namespace}element
    end {namespace}element
    start {namespace}empty-element
    end {namespace}empty-element
    end {namespace}root

    """
test_xml_etree.py 文件源码 项目:ndk-python 作者: gittor 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def interface():
    r"""
    Test element tree interface.

    >>> element = ET.Element("tag")
    >>> check_element(element)
    >>> tree = ET.ElementTree(element)
    >>> check_element(tree.getroot())

    >>> element = ET.Element("t\xe4g", key="value")
    >>> tree = ET.ElementTree(element)
    >>> repr(element)   # doctest: +ELLIPSIS
    "<Element 't\\xe4g' at 0x...>"
    >>> element = ET.Element("tag", key="value")

    Make sure all standard element methods exist.

    >>> check_method(element.append)
    >>> check_method(element.extend)
    >>> check_method(element.insert)
    >>> check_method(element.remove)
    >>> check_method(element.getchildren)
    >>> check_method(element.find)
    >>> check_method(element.iterfind)
    >>> check_method(element.findall)
    >>> check_method(element.findtext)
    >>> check_method(element.clear)
    >>> check_method(element.get)
    >>> check_method(element.set)
    >>> check_method(element.keys)
    >>> check_method(element.items)
    >>> check_method(element.iter)
    >>> check_method(element.itertext)
    >>> check_method(element.getiterator)

    These methods return an iterable. See bug 6472.

    >>> check_method(element.iter("tag").next)
    >>> check_method(element.iterfind("tag").next)
    >>> check_method(element.iterfind("*").next)
    >>> check_method(tree.iter("tag").next)
    >>> check_method(tree.iterfind("tag").next)
    >>> check_method(tree.iterfind("*").next)

    These aliases are provided:

    >>> assert ET.XML == ET.fromstring
    >>> assert ET.PI == ET.ProcessingInstruction
    >>> assert ET.XMLParser == ET.XMLTreeBuilder
    """
test_xml_etree.py 文件源码 项目:ndk-python 作者: gittor 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def parsefile():
    """
    Test parsing from file.

    >>> tree = ET.parse(SIMPLE_XMLFILE)
    >>> normalize_crlf(tree)
    >>> tree.write(sys.stdout)
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>
    >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
    >>> normalize_crlf(tree)
    >>> tree.write(sys.stdout)
    <ns0:root xmlns:ns0="namespace">
       <ns0:element key="value">text</ns0:element>
       <ns0:element>text</ns0:element>tail
       <ns0:empty-element />
    </ns0:root>

    >>> with open(SIMPLE_XMLFILE) as f:
    ...     data = f.read()

    >>> parser = ET.XMLParser()
    >>> parser.version  # doctest: +ELLIPSIS
    'Expat ...'
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>

    >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>

    >>> target = ET.TreeBuilder()
    >>> parser = ET.XMLParser(target=target)
    >>> parser.feed(data)
    >>> print serialize(parser.close())
    <root>
       <element key="value">text</element>
       <element>text</element>tail
       <empty-element />
    </root>
    """
test_xml_etree.py 文件源码 项目:ndk-python 作者: gittor 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def custom_builder():
    """
    Test parser w. custom builder.

    >>> with open(SIMPLE_XMLFILE) as f:
    ...     data = f.read()
    >>> class Builder:
    ...     def start(self, tag, attrib):
    ...         print "start", tag
    ...     def end(self, tag):
    ...         print "end", tag
    ...     def data(self, text):
    ...         pass
    >>> builder = Builder()
    >>> parser = ET.XMLParser(target=builder)
    >>> parser.feed(data)
    start root
    start element
    end element
    start element
    end element
    start empty-element
    end empty-element
    end root

    >>> with open(SIMPLE_NS_XMLFILE) as f:
    ...     data = f.read()
    >>> class Builder:
    ...     def start(self, tag, attrib):
    ...         print "start", tag
    ...     def end(self, tag):
    ...         print "end", tag
    ...     def data(self, text):
    ...         pass
    ...     def pi(self, target, data):
    ...         print "pi", target, repr(data)
    ...     def comment(self, data):
    ...         print "comment", repr(data)
    >>> builder = Builder()
    >>> parser = ET.XMLParser(target=builder)
    >>> parser.feed(data)
    pi pi 'data'
    comment ' comment '
    start {namespace}root
    start {namespace}element
    end {namespace}element
    start {namespace}element
    end {namespace}element
    start {namespace}empty-element
    end {namespace}empty-element
    end {namespace}root

    """


问题


面经


文章

微信
公众号

扫码关注公众号