def process(self, task):
status, tasks, items = 0, set(), set()
with (await self.manager.semaphore):
try:
request = Request(url=task.url)
request = await self.pipeline.requests.process(request)
request = await self.middleware.http.before(request)
response = await self.downloader.process(request)
response = await self.middleware.http.after(response)
response = await self.pipeline.responses.process(response)
if response.status in constants.HTTP_FAILED:
status = response.status
else:
tasks, items = await self.spider.process(task=task, response=response)
if tasks:
tasks = await self.pipeline.tasks.process(tasks)
if items:
items = await self.pipeline.items.process(items)
await self.pipeline.stats.process(stats=await self.stats())
except aiohttp.ClientError as e:
log.exception(e)
status = constants.status.RETRIAL
self.session.close()
except Exception as e:
log.exception(e)
status = constants.status.FAILED
result = Result(status=status, task=task, tasks=tasks, items=items)
await self.manager.process(result=result)
return result
评论列表
文章目录