def get_pages_in_category_tree(source, category, count):
pages = set()
seen_categories = set()
current_categories = {category}
while len(pages) < count:
log.debug(len(pages))
if not current_categories:
break
next_categories = set()
with multiprocessing.Pool(processes=len(current_categories)) as pool:
results = pool.map(lambda category: get_category_members(source, category), current_categories)
for result in results:
next_categories.update(result['subcats'])
pages.update(result['pages'])
seen_categories.update(current_categories)
current_categories = next_categories - seen_categories
log.debug(len(pages))
return list(pages)
评论列表
文章目录