def process_response(self, request, response, spider):
if request.meta.get('crack_retry_count', 0) > self.MAX_RETRY:
raise IgnoreRequest('Max retries exceeded %s' % request.meta.get('original_request', request))
if isinstance(response, HtmlResponse) and 'robot check' in ''.join([x.strip().lower() for x in response.xpath('//title/text()').extract()]):
self.cracking = True
self.crawler.stats.inc_value('robot_check')
# Log the url of the original request that got blocked
self.logger.warning('robot check {}'.format(request.meta.get('original_request') or request))
return self.request_image(request, response)
elif request.meta.get('image_request', False):
self.logger.debug('processing image {}'.format(request))
return self.process_image(request, response)
else:
self.cracking = False
return response
__init__.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录