crawl-proxy-nonblock.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:tianyancha_project 作者: sunbopython 项目源码 文件源码
def getherproxy_req(self):
        """get proxy from gatherproxy.com"""
        block = True

        if not block:
            # method1-nonblock
            url = 'http://gatherproxy.com/proxylist/anonymity/?t=Elite'
            settings = Settings()
            @defer.inlineCallbacks
            def getpage(request,page):
                try:
                    print("Request {},pagenumber:{}".format(request,page))
                    response = yield HTTP11DownloadHandler(settings).download_request(request,spider=None)
                    if response.status==200:
                        self._get_proxy(response.body.decode(),country=self.country)
                except Exception as e:
                    print(e)
                    print("[!] Failed: request {} of page:{}".format(request,page))
                    pass##
            def iter_page():
                work =( 
                            getpage(FormRequest(url=url,
                                                headers=self.headers,
                                                formdata={'Type':'elite','PageIdx':str(page),'Uptime':'0'},
                                                meta={'download_timeout':60}),page=page)  for page in range(1,self.maxpage+1)
                        )
                coop = task.Cooperator()
                join = defer.DeferredList(coop.coiterate(work) for i in range(self.concurrent))
                join.addBoth(lambda _: reactor.stop())
            iter_page()
            reactor.run()
        else:
            # method 2- block
            url = 'http://gatherproxy.com/proxylist/anonymity/?t=Elite'
            for pagenum in range(1,self.maxpage+1):
                try:
                    data = {'Type':'elite','PageIdx':str(pagenum),'Uptime':'0'}
                    headers = copy.copy(self.headers)
                    r = requests.post(url, headers=headers, data=data) 
                except Exception as e:
                    print(str(e))
                    print('[!] Failed: %s' % url)
                    gatherproxy_list = []
                    return gatherproxy_list
                self._get_proxy(r.text,country=self.country)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号