def get_clean_html(etree, text_only=False):
_is_etree(etree)
# enable filters to remove Javascript and CSS from HTML document
cleaner = Cleaner()
cleaner.javascript = True
cleaner.style = True
cleaner.html = True
cleaner.page_structure = False
cleaner.meta = False
cleaner.safe_attrs_only = False
cleaner.links = False
html = cleaner.clean_html(etree)
if text_only:
return html.text_content()
return lxml.html.tostring(html)
评论列表
文章目录