document.py 文件源码-python代码片段

def _process(self, root='..'):
        files = []
        self._clean_html()
        self._annotate()

        for (element, attr, url, _) in self._html.iterlinks():
            if element.tag == 'a' and attr == 'href' and url.startswith('https://www.google.com/url'):
                element.set('href', process_link(url, root=root))
            elif element.tag == 'img' and attr == 'src':
                filetitle = hashlib.md5(url.encode()).hexdigest()
                filetitle += '.jpg'
                element.set('src', '../' +  self._id + '/' + filetitle) # We go to top level to handle when the document is use as appliance
                files.append((url, filetitle))

        self._toc = self._get_toc()
        self._add_anchors()
        self._wrap_images()
        self._replace_youtube_videos()

        # Wrap the original body
        try:
            body = self._html.xpath('//body')[0]
        except (IndexError):
            body = lxml.html.Element('body')
        body.tag = 'div'

        if 'style' in body.attrib:
            del body.attrib['style']

        self._content = lxml.etree.tostring(body, pretty_print=True, method="html")
        return self._theme.render(self._template + '.html', document=self, root=root, config=self._config, appliances=self._appliances), files