def download_single_checked(self, url: str, destination: str, prefix: str):
"""
Download a single image, checking for failure cases.
:param url: Url to attempt to download an image from
:param destination: folder to store downloaded image in
:param prefix: synset id or descriptor word for url
:return: Filename or None as success if downloaded succeeded
"""
# splits to (`url+filename`, `.`, `filesuffix`)
filetype = url.strip().rpartition('.')[2]
keep = None
# We need a naming scheme that won't overwrite anything
# Option a) pass in the index with the url
# Option b) use a sufficiently sized random number
# > Only after generating 1 billion UUIDs every second for the next 100 years,
# > the prob of creating just one duplicate would be about 50%.
# > The prob of one duplicate would be about 50% if every person on earth owns 600 million UUIDs.
file = os.path.join(destination, '{}-{}.{}'.format(prefix, uuid.uuid4(), filetype))
try:
# require either .png, .jpg, or .jpeg
if filetype in ['png', 'jpg', 'jpeg']:
# Get the file
response = requests.get(url, stream=True, timeout=5)
if response.status_code == 200:
with open(file, 'wb') as out_file:
response.raw.decode_content = True
shutil.copyfileobj(response.raw, out_file)
keep = False # None -> False :: We have a file now, need to verify
# Check we got an image not some HTML junk 404
with Image.open(file) as img:
# Logic here is that if we can interpret the image then its good
# PIL is lazy - the raster data isn't loaded until needed or `load` is called explicitly'
keep = True # False -> True :: We've decided to keep the download
# Look through the known 'not available images'
for bin_image in binary_images.values():
# If this image size matches
if img.size == bin_image['size']:
# Compare the raster data
with Image.open(io.BytesIO(bin_image['raster'])) as raster:
if ImageChops.difference(raster, img).getbbox() is None:
# No bounding box for the difference of these images, so
# this is a 'image not availble' image
keep = False # True -> False :: Changed our mind..
# If anything above failed we're not keeping this one
except:
keep = False
finally:
if keep is None or keep is False:
if os.path.isfile(file):
os.remove(file)
else:
return file # Return the name of the downloaded file, otherwise implicit return None
评论列表
文章目录