import_html.py 文件源码-python代码片段

import_html.py 文件源码

python

阅读 28 收藏 0 点赞 0 评论 0

def download_html_image(url, html, image_path):
    """ ??html???? """
    soup = BeautifulSoup(html, "html.parser")
    imgs = soup.select("img")
    for img in imgs:
        src = img['src'] if not url else full_url(url, img["src"])
        _, ext = os.path.splitext(src)
        filename = "/{0}/{1}{2}".format(image_path, uuid.uuid1().hex, ext)
        full_filename = "{0}{1}".format(current_app.root_path, filename)
        filename = "{0}{1}".format(current_app.config["UPLOADIMG_HOST"], filename)
        if not download_file(src, full_filename):
            img['src'] = src
        else:
            img['src'] = filename
    return unicode(soup)