def process_request(self, request, spider): # pylint:disable=unused-argument
"""Process incoming request."""
parsed_uri = urlparse(request.url)
domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
domain = domain.replace("http://", "").replace("https://", "") \
.replace("/", "")
banned_domains = settings.get('BANNED_DOMAINS')
if hashlib.md5(domain).hexdigest() in banned_domains:
# Do not execute this request
request.meta['proxy'] = ""
msg = "Ignoring request {}, This domain is banned." \
.format(request.url)
logging.info(msg)
raise IgnoreRequest()
评论列表
文章目录