download.py 文件源码

python
阅读 62 收藏 0 点赞 0 评论 0

项目:Imagyn 作者: zevisert 项目源码 文件源码
def download_single_checked(self, url: str, destination: str, prefix: str):
        """
        Download a single image, checking for failure cases.
        :param url: Url to attempt to download an image from
        :param destination: folder to store downloaded image in
        :param prefix: synset id or descriptor word for url
        :return: Filename or None as success if downloaded succeeded
        """
        # splits to (`url+filename`, `.`, `filesuffix`)
        filetype = url.strip().rpartition('.')[2]
        keep = None

        # We need a naming scheme that won't overwrite anything
        # Option a) pass in the index with the url
        # Option b) use a sufficiently sized random number
        #   > Only after generating 1 billion UUIDs every second for the next 100 years,
        #   > the prob of creating just one duplicate would be about 50%.
        #   > The prob of one duplicate would be about 50% if every person on earth owns 600 million UUIDs.
        file = os.path.join(destination, '{}-{}.{}'.format(prefix, uuid.uuid4(), filetype))

        try:
            # require either .png, .jpg, or .jpeg
            if filetype in ['png', 'jpg', 'jpeg']:

                # Get the file
                response = requests.get(url, stream=True, timeout=5)
                if response.status_code == 200:
                    with open(file, 'wb') as out_file:
                        response.raw.decode_content = True
                        shutil.copyfileobj(response.raw, out_file)
                        keep = False  # None -> False :: We have a file now, need to verify

                    # Check we got an image not some HTML junk 404
                    with Image.open(file) as img:
                        # Logic here is that if we can interpret the image then its good
                        # PIL is lazy - the raster data isn't loaded until needed or `load` is called explicitly'
                        keep = True  # False -> True :: We've decided to keep the download

                        # Look through the known 'not available images'
                        for bin_image in binary_images.values():

                            # If this image size matches
                            if img.size == bin_image['size']:

                                # Compare the raster data
                                with Image.open(io.BytesIO(bin_image['raster'])) as raster:
                                    if ImageChops.difference(raster, img).getbbox() is None:
                                        # No bounding box for the difference of these images, so
                                        # this is a 'image not availble' image
                                        keep = False  # True -> False :: Changed our mind..

        # If anything above failed we're not keeping this one
        except:
            keep = False
        finally:
            if keep is None or keep is False:
                if os.path.isfile(file):
                    os.remove(file)
            else:
                return file  # Return the name of the downloaded file, otherwise implicit return None
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号