def fetch (src, save, db, collection, p, f):
"""This is the worker function to get the next recipe from
the pending queue, save it, and put all the related urls
on the pending queue for other workers to process"""
while True:
url = p.get()
if url in f.queue:
p.task_done()
else:
try:
recipe = src(url)
if save:
recipe.save()
if db is not None and collection is not None:
recipe.store(db, collection)
f.put(url)
map(lambda x: p.put(x), filter(lambda link: link != url, recipe.getOtherRecipeLinks()))
except ValueError:
print '[warning] could not fetch:', url
p.task_done()
if PAUSE_CRAWLER:
# pause a random interval between PAUSE_TIME_RANGE seconds before continuing
sleep(randint(PAUSE_TIME_RANGE[0], PAUSE_TIME_RANGE[1]))
评论列表
文章目录