middleware.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:ahmia-crawler 作者: ahmia 项目源码 文件源码
def process_response(self, request, response, spider): # pylint:disable=unused-argument
        """
        Only allow HTTP response types that that match the given list of
        filtering regexs
        """
        # to specify on a per-spider basis
        # type_whitelist = getattr(spider, "response_type_whitelist", None)
        type_whitelist = (r'text', )
        content_type_header = response.headers.get('content-type', None)
        if content_type_header and self.is_valid_response(type_whitelist,
                                                          content_type_header):
            return response
        else:
            msg = "Ignoring request {}, content-type was not in whitelist" \
                  .format(response.url)
            logging.info(msg)
            raise IgnoreRequest()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号