dhtml.py 文件源码-python代码片段

dhtml.py 文件源码

python

阅读 25 收藏 0 点赞 0 评论 0

def read(self):
        with io.open(self.filename, 'rb') as dhtml_file:
            def strain(name, attrs):
                if name == 'title':
                    return True
                if name == 'div' and dict(attrs).get('id', None) in self.ids:
                    return True
                return False
            soup = BeautifulSoup(dhtml_file, "lxml", parse_only=SoupStrainer(strain))
            parser = html_parser.HTMLParser()
            self.title = parser.unescape(soup.title.decode_contents()) if soup.title else _('Untitled')
            for an_id in self.ids:
                found_elements = soup.find_all(id=an_id)
                if found_elements:
                    [element] = found_elements
                    self.elements[an_id] = element.decode_contents()
                else:
                    self.elements[an_id] = ''
            self.original_encoding = soup.original_encoding