def download_image(url, save_dir, loaded_urls=None):
real_url = None
response = None
save_image_name = None
try:
req = Request(url, headers={"User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
response = urlopen(req)
real_url = response.geturl()
if loaded_urls and real_url in loaded_urls:
print 'URL had been downloaded in previous searching'
real_url = None
else:
img_name = hashlib.md5(real_url).hexdigest()
save_image_name = save_dir + '/' + img_name + '.' + CONFIGS[u'search_file_type']
print 'Try to save image ' + real_url + ' into file: ' + save_image_name
output_file = open(save_image_name,'wb')
data = response.read()
output_file.write(data)
#response.close()
except IOError as e: #If there is any IOError
print("IOError on url "+str(url))
print e
except HTTPError as e: #If there is any HTTPError
print("HTTPError on url "+str(url))
print e
except URLError as e:
print("URLError on url "+str(url))
print e
if response:
response.close()
return real_url, save_image_name
############## End of Functions to get real urls and download images ############
############## Main Program ############
评论列表
文章目录