spider.py 文件源码

python
阅读 28 收藏 0 点赞 0 评论 0

项目:freehp 作者: jadbin 项目源码 文件源码
def _update_proxy(self, urls):
        for u in urls:
            retry_cnt = 3
            while retry_cnt > 0:
                retry_cnt -= 1
                try:
                    async with aiohttp.ClientSession(loop=self._loop) as session:
                        with async_timeout.timeout(self._timeout, loop=self._loop):
                            async with session.request("GET", u, headers=self._headers) as resp:
                                url = str(resp.url)
                                body = await resp.read()
                except Exception as e:
                    log.info("{} error occurred when update proxy on url={}: {}".format(type(e), u, e))
                else:
                    retry_cnt = 0
                    addr_list = self._proxy_finder.find_proxy(url, body)
                    log.debug("Find {} proxies on the page '{}'".format(len(addr_list), u))
                    if addr_list:
                        await self._callback(*addr_list)
            await asyncio.sleep(self._sleep_time, loop=self._loop)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号