def _redirect(self, redirected, request, spider, reason):
reason = response_status_message(reason)
redirects = request.meta.get('redirect_times', 0) + 1
if redirects <= self.max_redirect_times:
redirected.meta['redirect_times'] = redirects
redirected.meta['redirect_urls'] = request.meta.get('redirect_urls', []) + \
[request.url]
redirected.meta['priority'] = redirected.meta['priority'] + self.priority_adjust
self.logger.debug("Redirecting %s to %s from %s for %s times " % (
reason, redirected.url, request.url, redirected.meta.get("redirect_times")))
return redirected
else:
self.logger.info("Discarding %s: max redirections reached" % request.url)
if request.meta.get("callback") == "parse":
self.crawler.stats.inc_total_pages(crawlid=request.meta['crawlid'])
self.logger.error(
" in redicrect request error to failed pages url:%s, exception:%s, meta:%s" % (
request.url, reason, request.meta))
raise IgnoreRequest("max redirections reached:%s" % reason)
downloadermiddlewares.py 文件源码
python
阅读 15
收藏 0
点赞 0
评论 0
评论列表
文章目录