def download_articles(name, categories, subset):
data = {}
print("Downloading articles")
newsgroups_data = fetch_20newsgroups(subset=subset, categories=categories, remove=())
for i in range(len(newsgroups_data['data'])):
line = newsgroups_data['data'][i]
data[str(len(data))] = {'text': line, 'label': newsgroups_data['target_names'][newsgroups_data['target'][i]]}
print(len(data))
raw_data_dir = os.path.join('..', 'data', '20ng', name)
print("Saving to", raw_data_dir)
fh.makedirs(raw_data_dir)
fh.write_to_json(data, os.path.join(raw_data_dir, subset + '.json'))
评论列表
文章目录