downloadermiddlewares.py 文件源码

python
阅读 18 收藏 0 点赞 0 评论 0

项目:structure_spider 作者: ShichaoMa 项目源码 文件源码
def _retry(self, request, reason, spider):
        spider.change_proxy = True
        retries = request.meta.get('retry_times', 0) + 1

        if request.meta.get("if_next_page"):
            self.logger.debug("in _retry re-yield next_pages request: %s, reason: %s. " % (request.url, reason))
            return request.copy()
        elif retries <= self.max_retry_times:
            retryreq = request.copy()
            retryreq.meta['retry_times'] = retries
            retryreq.dont_filter = True
            retryreq.meta['priority'] = retryreq.meta['priority'] + self.crawler.settings.get(
                "REDIRECT_PRIORITY_ADJUST")
            self.logger.debug("in _retry retries times: %s, re-yield request: %s, reason: %s" % (
            retries, request.url, reason))
            return retryreq

        else:
            if request.meta.get("callback") == "parse":
                spider.crawler.stats.inc_total_pages(crawlid=request.meta['crawlid'])
            self.logger.error(
                "retry request error to failed pages url:%s, exception:%s, meta:%s" % (
                    request.url, reason, request.meta))
            self.logger.info("Gave up retrying %s (failed %d times): %s" % (request.url, retries, reason))
            raise IgnoreRequest("%s %s" % (reason, "retry %s times. "%retries))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号