def download(self, url, headers, proxies):
""" Download a and return the page content
args:
url (str): URL
headers (dict): dict of headers (like user_agent)
proxies (dict): proxy dict w/ keys 'http'/'https', values
are strs (i.e. 'http(s)://IP') (default: None)
"""
session = requests_cache.CachedSession()
session.hooks = {'response': self.make_throttle_hook(self.throttle)}
try:
resp = session.get(url, headers=headers, proxies=proxies,
timeout=self.timeout)
html = resp.text
if resp.status_code >= 400:
print('Download error:', resp.text)
html = None
if self.num_retries and 500 <= resp.status_code < 600:
# recursively retry 5xx HTTP errors
self.num_retries -= 1
return self.download(url, headers, proxies)
except requests.exceptions.RequestException as e:
print('Download error:', e)
return {'html': None, 'code': 500}
return {'html': html, 'code': resp.status_code}
downloader_requests_cache.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录