def newsgroups_class_distrib():
from sklearn.datasets import fetch_20newsgroups
ngroup_test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes'), categories=None)
ngroup_train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'), categories=None)
test_data = ngroup_test.data
train_data = ngroup_train.data
test_groups = ngroup_test.target
train_groups = ngroup_train.target
n = 2000
train_groups = train_groups[:n]
test_groups = test_groups[:n]
plt.figure()
plt.hist(train_groups, 20, normed=True, range=(0, 19))
plt.title("train groups")
plt.figure()
plt.hist(test_groups, 20, normed=True, range=(0, 19))
plt.title("test groups")
plt.show()
评论列表
文章目录