def _download(self, request, spider):
def _retry():
if self.retry_on_download_timeout:
self.logger.debug('Read timed out, retry request {}'.format(request))
self.crawl(request, spider)
try:
self._process_request(request, spider)
if request is None:
return
method = request.method.upper()
resp = None
kw_params = {
'timeout': self.download_timeout,
'cookies': request.cookies,
'headers': request.headers,
'proxies': {
'http': request.proxy,
'https': request.proxy
}
}
self.logger.debug('[{}]<{} {}>'.format(spider.name, method, request.url))
if method == 'GET':
resp = requests.get(request.url, **kw_params)
elif method == 'POST':
resp = requests.post(request.url, request.data, **kw_params)
self._responses_queue.put((Response(resp.url, resp.status_code, resp.content, request,
resp.cookies), spider))
except (requests.ReadTimeout, requests.ConnectTimeout, requests.ConnectionError):
_retry()
except Exception as err:
self.logger.error(err, exc_info=True)
评论列表
文章目录