def _process(self, root='..'):
files = []
self._clean_html()
self._annotate()
for (element, attr, url, _) in self._html.iterlinks():
if element.tag == 'a' and attr == 'href' and url.startswith('https://www.google.com/url'):
element.set('href', process_link(url, root=root))
elif element.tag == 'img' and attr == 'src':
filetitle = hashlib.md5(url.encode()).hexdigest()
filetitle += '.jpg'
element.set('src', '../' + self._id + '/' + filetitle) # We go to top level to handle when the document is use as appliance
files.append((url, filetitle))
self._toc = self._get_toc()
self._add_anchors()
self._wrap_images()
self._replace_youtube_videos()
# Wrap the original body
try:
body = self._html.xpath('//body')[0]
except (IndexError):
body = lxml.html.Element('body')
body.tag = 'div'
if 'style' in body.attrib:
del body.attrib['style']
self._content = lxml.etree.tostring(body, pretty_print=True, method="html")
return self._theme.render(self._template + '.html', document=self, root=root, config=self._config, appliances=self._appliances), files
评论列表
文章目录