def test_stratified_kfold_ratios():
# Check that stratified kfold preserves label ratios in individual splits
# Repeat with shuffling turned off and on
n_samples = 1000
labels = np.array([4] * int(0.10 * n_samples) +
[0] * int(0.89 * n_samples) +
[1] * int(0.01 * n_samples))
for shuffle in [False, True]:
for train, test in cval.StratifiedKFold(labels, 5, shuffle=shuffle):
assert_almost_equal(np.sum(labels[train] == 4) / len(train), 0.10,
2)
assert_almost_equal(np.sum(labels[train] == 0) / len(train), 0.89,
2)
assert_almost_equal(np.sum(labels[train] == 1) / len(train), 0.01,
2)
assert_almost_equal(np.sum(labels[test] == 4) / len(test), 0.10, 2)
assert_almost_equal(np.sum(labels[test] == 0) / len(test), 0.89, 2)
assert_almost_equal(np.sum(labels[test] == 1) / len(test), 0.01, 2)
评论列表
文章目录