def fetch(self, url, max_redirect):
tries = 0
exception = None
while tries < self.max_tries:
try:
response = await self.session.get(
url, allow_redirects=False)
break
except aiohttp.ClientError as client_error:
exception = client_error
tries += 1
else:
self.record_statistic(FetchStatistic(url=url,
next_url=None,
status=None,
exception=exception,
size=0,
content_type=None,
encoding=None,
num_urls=0,
num_new_urls=0))
return
try:
if is_redirect(response):
location = response.headers['location']
next_url = urllib.parse.urljoin(url, location)
self.record_statistic(FetchStatistic(url=url,
next_url=next_url,
status=response.status,
exception=None,
size=0,
content_type=None,
encoding=None,
num_urls=0,
num_new_urls=0))
if next_url in self.seen_urls:
return
if max_redirect > 0:
self.add_url(next_url, max_redirect - 1)
else:
print('redirect limit reached for %r from %r',
next_url, url)
else:
stat, links = await self.parse_links(response)
self.record_statistic(stat)
for link in links.difference(self.seen_urls):
self.q.put_nowait((link, self.max_redirect))
self.seen_urls.update(links)
finally:
await response.release()
评论列表
文章目录