def download_page(url, referer, maxretries, timeout, pause):
tries = 0
htmlpage = None
while tries < maxretries and htmlpage is None:
try:
code = 404
req = request.Request(url)
req.add_header('Referer', referer)
req.add_header('User-agent',
'Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/11.04 Chromium/12.0.742.91 Chrome/12.0.742.91 Safari/534.30')
with closing(request.urlopen(req, timeout=timeout)) as f:
code = f.getcode()
htmlpage = f.read()
sleep(pause)
except (urlerror.URLError, socket.timeout, socket.error):
tries += 1
if htmlpage:
return htmlpage.decode('utf-8'), code
else:
return None, code
评论列表
文章目录