middleware.py 文件源码

python
阅读 15 收藏 0 点赞 0 评论 0

项目:retr 作者: aikipooh 项目源码 文件源码
def process_request(self, request, spider):
        def set_auth(request, proxy):
            if proxy.creds:
                request.headers['Proxy-Authorization'] = proxy.creds

        lg.debug('in process_request: {}, {}'.format(request, request.meta))

        pa=request.meta.pop('proxy_action', None)
        if pa == 'disable':
            self.pp.set_status(self.map_proxy(request.meta['proxy']), 'D')
            del request.meta['proxy'] # Make it pick another proxy
        elif pa == 'release':
            proxy=self.map_proxy(request.meta['proxy'])
            self.pp.release_proxy(proxy)
            raise IgnoreRequest

        # Don't overwrite with a random one (server-side state for IP)
        if 'proxy' in request.meta:
            proxy=self.map_proxy(request.meta['proxy'])
            set_auth(request, proxy)
            return # No fuss, we have a proxy already

        if self.mode == 'random':
            proxy = self.pp.get_proxy(True)        
        elif self.mode == 'sequential':
            proxy = self.pp.get_proxy()

        request.meta['proxy'] = proxy.p
        set_auth(request, proxy)

        lg.debug('Using proxy '+proxy.p)

        # Start setup_session anew wherever we are, fresh or recurring
        req=request.meta.get('ss_request')
        if req:
            # Store original request to use after the session is setup
            if 'original_request' not in request.meta:
                request.meta['original_request']=request
        else:
            req=request

        return req.replace(meta=request.meta, dont_filter=True)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号