save_article_content.py 文件源码-python代码片段

save_article_content.py 文件源码

python

阅读 30 收藏 0 点赞 0 评论 0

项目：web_develop 作者: dongweiming 项目源码文件源码

def fetch(url, retry=0):
    proxy = 'http://{}'.format(Proxy.get_random()['address'])
    headers = {'user-agent': get_user_agent()}
    conn = aiohttp.ProxyConnector(proxy=proxy)

    js_url = gen_js_url(url)

    try:
        with aiohttp.ClientSession(connector=conn) as session:
            with aiohttp.Timeout(TIMEOUT):
                async with session.get(url, headers=headers) as resp:
                    html_text = await resp.text()

                async with session.get(js_url, headers=headers) as resp:
                    js_data = await resp.json()
    except:
        retry += 1
        if retry > 5:
            raise CrawlerError()
        await asyncio.sleep(1)
        return await fetch(url, retry=retry)
    return html_text, js_data