def process_request(self, request, spider):
parsed_url = urlparse.urlparse(request.url)
host = parsed_url.hostname
if self.counter[host] < self.max_pages:
self.counter[host] += 1
spider.logger.info('Page count is %d for %s' % (self.counter[host], host))
return None
else:
raise IgnoreRequest('MAX_PAGES_PER_DOMAIN reached, filtered %s' % request.url)
评论列表
文章目录