def test_stratified_kfold_ratios():
# Check that stratified kfold preserves class ratios in individual splits
# Repeat with shuffling turned off and on
n_samples = 1000
X = np.ones(n_samples)
y = np.array([4] * int(0.10 * n_samples) +
[0] * int(0.89 * n_samples) +
[1] * int(0.01 * n_samples))
for shuffle in (False, True):
for train, test in StratifiedKFold(5, shuffle=shuffle).split(X, y):
assert_almost_equal(np.sum(y[train] == 4) / len(train), 0.10, 2)
assert_almost_equal(np.sum(y[train] == 0) / len(train), 0.89, 2)
assert_almost_equal(np.sum(y[train] == 1) / len(train), 0.01, 2)
assert_almost_equal(np.sum(y[test] == 4) / len(test), 0.10, 2)
assert_almost_equal(np.sum(y[test] == 0) / len(test), 0.89, 2)
assert_almost_equal(np.sum(y[test] == 1) / len(test), 0.01, 2)
评论列表
文章目录