def get_real_url(url, loaded_urls):
real_url = None
response = None
try:
req = Request(url, headers={"User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
response = urlopen(req)
real_url = response.geturl()
print 'Real_url is: ' + str(real_url)
if real_url in loaded_urls:
print 'URL had been downloaded in previous '
real_url = None
except IOError as e: #If there is any IOError
print("IOError on url "+str(url))
print e
except HTTPError as e: #If there is any HTTPError
print("HTTPError on url "+str(url))
print e
except URLError as e:
print("URLError on url "+str(url))
print e
if response:
response.close()
return real_url
评论列表
文章目录