def _retry(self, request, reason, spider):
spider.change_proxy = True
retries = request.meta.get('retry_times', 0) + 1
if request.meta.get("if_next_page"):
self.logger.debug("in _retry re-yield next_pages request: %s, reason: %s. " % (request.url, reason))
return request.copy()
elif retries <= self.max_retry_times:
retryreq = request.copy()
retryreq.meta['retry_times'] = retries
retryreq.dont_filter = True
retryreq.meta['priority'] = retryreq.meta['priority'] + self.crawler.settings.get(
"REDIRECT_PRIORITY_ADJUST")
self.logger.debug("in _retry retries times: %s, re-yield request: %s, reason: %s" % (
retries, request.url, reason))
return retryreq
else:
if request.meta.get("callback") == "parse":
spider.crawler.stats.inc_total_pages(crawlid=request.meta['crawlid'])
self.logger.error(
"retry request error to failed pages url:%s, exception:%s, meta:%s" % (
request.url, reason, request.meta))
self.logger.info("Gave up retrying %s (failed %d times): %s" % (request.url, retries, reason))
raise IgnoreRequest("%s %s" % (reason, "retry %s times. "%retries))
downloadermiddlewares.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录