def scrape(folder=None):
"""
Returns data in the format:
{
node_id: {
channels: [channel_name, ...],
version: string,
registries: {
histogram: [path, ...]
event: [path, ...]
scalar: [path, ...]
}
},
...
}
"""
if folder is None:
folder = tempfile.mkdtemp()
error_cache = load_error_cache(folder)
requests_cache.install_cache(os.path.join(folder, 'probe_scraper_cache'))
results = defaultdict(dict)
for channel in CHANNELS.iterkeys():
tags = load_tags(channel)
versions = extract_tag_data(tags, channel)
save_error_cache(folder, error_cache)
print "\n" + channel + " - extracted version data:"
for v in versions:
print " " + str(v)
print "\n" + channel + " - loading files:"
for v in versions:
print " from: " + str(v)
files = download_files(channel, v['node'], folder, error_cache)
results[channel][v['node']] = {
'channel': channel,
'version': v['version'],
'registries': files,
}
save_error_cache(folder, error_cache)
return results
评论列表
文章目录