input.py 文件源码-python代码片段

input.py 文件源码
python
阅读 24 收藏 0 点赞 0 评论 0
def HTML(text, encoding=None):
    """Parse the given HTML source and return a markup stream.

    Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
    iterated over multiple times:

    >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
    >>> print(html)
    <body><h1>Foo</h1></body>
    >>> print(html.select('h1'))
    <h1>Foo</h1>
    >>> print(html.select('h1/text()'))
    Foo

    :param text: the HTML source
    :return: the parsed XML event stream
    :raises ParseError: if the HTML text is not well-formed, and error recovery
                        fails
    """
    if isinstance(text, unicode):
        # If it's unicode text the encoding should be set to None.
        # The option to pass in an incorrect encoding is for ease
        # of writing doctests that work in both Python 2.x and 3.x.
        return Stream(list(HTMLParser(StringIO(text), encoding=None)))
    return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))