def test_allow_tags(self):
html = """
<html>
<head>
</head>
<body>
<p>some text</p>
<table>
<tr>
<td>hello</td><td>world</td>
</tr>
<tr>
<td>hello</td><td>world</td>
</tr>
</table>
<img>
</body>
</html>
"""
html_root = lxml.html.document_fromstring(html)
cleaner = Cleaner(
remove_unknown_tags = False,
allow_tags = ['table', 'tr', 'td'])
result = cleaner.clean_html(html_root)
self.assertEqual(12-5+1, len(list(result.iter())))
评论列表
文章目录