def HTML(text, encoding=None):
"""Parse the given HTML source and return a markup stream.
Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
iterated over multiple times:
>>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
>>> print(html)
<body><h1>Foo</h1></body>
>>> print(html.select('h1'))
<h1>Foo</h1>
>>> print(html.select('h1/text()'))
Foo
:param text: the HTML source
:return: the parsed XML event stream
:raises ParseError: if the HTML text is not well-formed, and error recovery
fails
"""
if isinstance(text, unicode):
# If it's unicode text the encoding should be set to None.
# The option to pass in an incorrect encoding is for ease
# of writing doctests that work in both Python 2.x and 3.x.
return Stream(list(HTMLParser(StringIO(text), encoding=None)))
return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
评论列表
文章目录