def process_response(self, request, response, spider): # pylint:disable=unused-argument
"""
Only allow HTTP response types that that match the given list of
filtering regexs
"""
# to specify on a per-spider basis
# type_whitelist = getattr(spider, "response_type_whitelist", None)
type_whitelist = (r'text', )
content_type_header = response.headers.get('content-type', None)
if content_type_header and self.is_valid_response(type_whitelist,
content_type_header):
return response
else:
msg = "Ignoring request {}, content-type was not in whitelist" \
.format(response.url)
logging.info(msg)
raise IgnoreRequest()
评论列表
文章目录