def process_request(self, request, spider):
def set_auth(request, proxy):
if proxy.creds:
request.headers['Proxy-Authorization'] = proxy.creds
lg.debug('in process_request: {}, {}'.format(request, request.meta))
pa=request.meta.pop('proxy_action', None)
if pa == 'disable':
self.pp.set_status(self.map_proxy(request.meta['proxy']), 'D')
del request.meta['proxy'] # Make it pick another proxy
elif pa == 'release':
proxy=self.map_proxy(request.meta['proxy'])
self.pp.release_proxy(proxy)
raise IgnoreRequest
# Don't overwrite with a random one (server-side state for IP)
if 'proxy' in request.meta:
proxy=self.map_proxy(request.meta['proxy'])
set_auth(request, proxy)
return # No fuss, we have a proxy already
if self.mode == 'random':
proxy = self.pp.get_proxy(True)
elif self.mode == 'sequential':
proxy = self.pp.get_proxy()
request.meta['proxy'] = proxy.p
set_auth(request, proxy)
lg.debug('Using proxy '+proxy.p)
# Start setup_session anew wherever we are, fresh or recurring
req=request.meta.get('ss_request')
if req:
# Store original request to use after the session is setup
if 'original_request' not in request.meta:
request.meta['original_request']=request
else:
req=request
return req.replace(meta=request.meta, dont_filter=True)
评论列表
文章目录