def multi_core_scrape(num_pages, db_coll):
'''
Map the API scrape across number of processors - 1 for performance boost.
INPUT:
num_pages: int, number of pages to scrape
db_coll: pymongo collection object, collection to add documents to
OUTPUT:
None, records inserted into MongoDB
'''
cpus = cpu_count() - 1
pool = Pool(processes=cpus)
pages = range(1, num_pages + 1)
employers = pool.map(scrape_api_page, pages)
pool.close()
pool.join()
print 'Inserting Employer Records into MongoDB . . .'
pbar = ProgressBar()
for page in pbar(employers):
db_coll.insert_many(page)
glassdoor_search.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录