def __init__(self):
"""Initialize attributes."""
if sys.version.startswith('3.'):
# Python 3.x
super().__init__(convert_charrefs=False)
else:
# use HTMLParser.__init__ because HTMLParser is an 'old' style class, which cannot be passed to super()
# see http://codependentcodr.blogspot.com/2012/02/python-htmlparser-and-super.html
HTMLParser.__init__(self)
self._root = _HtmlHeaderNode(level=0) # root node with no data of itself, only 'children' matters
self._curr_node = self._root # most recently handled header node
self._in_header = False
self._header_id_count = {} # record header ids to avoid collisions
self._html = '' # full HTML string parsed
self._temp_start_tag = '' # temporary HTML start tag of this current header node
评论列表
文章目录