def handle_images(html, detach_images=False, organization=None, **kwargs):
""" Detach base64 images and others if detach_images is enabled
"""
tree = lxml.html.fromstring(html)
for img in tree.cssselect('img'):
try:
src = img.attrib['src']
except KeyError:
raise WrongHTML('<img> devrait avoir un attribut "src"')
if src.startswith('data:image/'):
# TODO: handle ValueError
image = InlineImage(src, organization=organization)
url = image.store()
img.set('src', url)
else:
if detach_images and organization:
image = HostedImage(src, organization=organization)
url = image.store()
img.set('src', url)
return lxml.html.tostring(tree).decode()
评论列表
文章目录