def crawl(self, job: Job):
try:
r = requests.get(job.url)
link_parser = LinkParser(job)
link_parser.parse(r.text)
links = link_parser.get_links()
except ValueError as e:
print("Couldn't parse url: ", job.url, e)
pass
except (requests.ConnectionError, requests.ConnectTimeout, requests.exceptions.SSLError) as e:
print("Couldn't parse url: ", job.url, e.strerror)
pass
else:
while not links.empty():
job = links.get()
self.print_status(job, links)
if job.priority < self._depth:
self.crawl(job)
links.task_done()
评论列表
文章目录