def craw(self,lock,count):
while 1:
next_task=self.task.get()
if next_task is None:
self.task.task_done()
continue
# print(self.urls.new_urls)
# new_url = self.urls.get_new_url()
# print("%s craw %d : %s" % (multiprocessing.current_process().name,count, new_url))
# new_html = self.downloader.download(new_url)
# new_urls, new_data = self.parser.parse(new_url, new_html)
# self.urls.add_new_urls(new_urls)
# self.outputer.collect_data(new_data)
# self.outputer.output_html()
# count += 1
new_url = next_task.a
print("%s craw %d : %s" % (multiprocessing.current_process().name, count, new_url))
new_html = self.downloader.download(new_url)
new_urls, new_data = self.parser.parse(new_url, new_html)
for i in range(len(new_urls)):
self.task.put(Task(new_urls[i]))
self.outputer.collect_data(new_data)
self.outputer.output_html()
self.task.task_done()
count += 1
评论列表
文章目录