def getherproxy_req(self):
"""get proxy from gatherproxy.com"""
block = True
if not block:
# method1-nonblock
url = 'http://gatherproxy.com/proxylist/anonymity/?t=Elite'
settings = Settings()
@defer.inlineCallbacks
def getpage(request,page):
try:
print("Request {},pagenumber:{}".format(request,page))
response = yield HTTP11DownloadHandler(settings).download_request(request,spider=None)
if response.status==200:
self._get_proxy(response.body.decode(),country=self.country)
except Exception as e:
print(e)
print("[!] Failed: request {} of page:{}".format(request,page))
pass##
def iter_page():
work =(
getpage(FormRequest(url=url,
headers=self.headers,
formdata={'Type':'elite','PageIdx':str(page),'Uptime':'0'},
meta={'download_timeout':60}),page=page) for page in range(1,self.maxpage+1)
)
coop = task.Cooperator()
join = defer.DeferredList(coop.coiterate(work) for i in range(self.concurrent))
join.addBoth(lambda _: reactor.stop())
iter_page()
reactor.run()
else:
# method 2- block
url = 'http://gatherproxy.com/proxylist/anonymity/?t=Elite'
for pagenum in range(1,self.maxpage+1):
try:
data = {'Type':'elite','PageIdx':str(pagenum),'Uptime':'0'}
headers = copy.copy(self.headers)
r = requests.post(url, headers=headers, data=data)
except Exception as e:
print(str(e))
print('[!] Failed: %s' % url)
gatherproxy_list = []
return gatherproxy_list
self._get_proxy(r.text,country=self.country)
crawl-proxy-nonblock.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录