def fetch(url, retry=0):
proxy = 'http://{}'.format(Proxy.get_random()['address'])
headers = {'user-agent': get_user_agent()}
conn = aiohttp.ProxyConnector(proxy=proxy)
js_url = gen_js_url(url)
try:
with aiohttp.ClientSession(connector=conn) as session:
with aiohttp.Timeout(TIMEOUT):
async with session.get(url, headers=headers) as resp:
html_text = await resp.text()
async with session.get(js_url, headers=headers) as resp:
js_data = await resp.json()
except:
retry += 1
if retry > 5:
raise CrawlerError()
await asyncio.sleep(1)
return await fetch(url, retry=retry)
return html_text, js_data
评论列表
文章目录