def read(self):
with io.open(self.filename, 'rb') as dhtml_file:
def strain(name, attrs):
if name == 'title':
return True
if name == 'div' and dict(attrs).get('id', None) in self.ids:
return True
return False
soup = BeautifulSoup(dhtml_file, "lxml", parse_only=SoupStrainer(strain))
parser = html_parser.HTMLParser()
self.title = parser.unescape(soup.title.decode_contents()) if soup.title else _('Untitled')
for an_id in self.ids:
found_elements = soup.find_all(id=an_id)
if found_elements:
[element] = found_elements
self.elements[an_id] = element.decode_contents()
else:
self.elements[an_id] = ''
self.original_encoding = soup.original_encoding
评论列表
文章目录