def open_url(connection, url, return_redirect_url=False):
'''Tries to open url and return page's html'''
if 'goodreads.com' in url:
url = url[url.find('goodreads.com') + len('goodreads.com'):]
try:
connection.request('GET', url, headers=HEADERS)
response = connection.getresponse()
if response.status == 301 or response.status == 302:
if return_redirect_url:
return response.msg['location']
response = open_url(connection, response.msg['location'])
else:
response = response.read()
except (HTTPException, socket.error):
time.sleep(1)
connection.close()
connection.connect()
connection.request('GET', url, headers=HEADERS)
response = connection.getresponse()
if response.status == 301 or response.status == 302:
if return_redirect_url:
return response.msg['location']
response = open_url(connection, response.msg['location'])
else:
response = response.read()
if 'Page Not Found' in response:
raise PageDoesNotExist('Page not found.')
return response
评论列表
文章目录