def fetch(url, seen, pool):
"""Fetch a url, stick any found urls into the seen set, and
dispatch any new ones to the pool."""
print("fetching", url)
data = ''
with eventlet.Timeout(5, False):
data = urllib2.urlopen(url).read()
for url_match in url_regex.finditer(data):
new_url = url_match.group(0)
# only send requests to eventlet.net so as not to destroy the internet
if new_url not in seen and 'eventlet.net' in new_url:
seen.add(new_url)
# while this seems stack-recursive, it's actually not:
# spawned greenthreads start their own stacks
pool.spawn_n(fetch, new_url, seen, pool)
recursive_crawler.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录