def proxy_checker(self):
""" Further test for proxy"""
def main():
success={}
settings = Settings()
@defer.inlineCallbacks
def getResponse(proxy,request):
try:
print("Request {} using proxy:{}".format(request,proxy))
response = yield HTTP11DownloadHandler(settings).download_request(request=request,spider=None)
if response.status==200:
success[proxy]=success.setdefault(proxy,0) + 1
print("Successful(+{}/{}) ip:{}".format(success[proxy],self.checknum,proxy))
if success[proxy]/self.checknum>= self.checkthreshold:
self.passproxy.add(proxy)
except Exception as e:
#print(e)
pass
def output_better_proxy(_):
""" writing proxies to file"""
with open('validProxy.txt','w') as f:
for p in self.passproxy:
print(p)
f.write(p+'\n')
def iter_proxy():
# work needs to be a generator, i tried to use list but failed to realize concurrent
work = ( getResponse(proxy,Request(url='http://myip.dnsdynamic.org',
headers=self.headers,
meta={ 'proxy':"http://"+proxy, 'download_timeout':self.timeout})) for proxy in self.proxy_list for times in range(self.checknum)
)
coop = task.Cooperator()
join = defer.DeferredList(coop.coiterate(work) for i in range(self.concurrent))
join.addCallback(output_better_proxy)
join.addCallback(lambda _: reactor.stop())
iter_proxy()
main()
reactor.run()
crawl-proxy-nonblock.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录