def test_grid_search_iid():
# test the iid parameter
# noise-free simple 2d-data
X, y = make_blobs(centers=[[0, 0], [1, 0], [0, 1], [1, 1]], random_state=0,
cluster_std=0.1, shuffle=False, n_samples=80)
# split dataset into two folds that are not iid
# first one contains data of all 4 blobs, second only from two.
mask = np.ones(X.shape[0], dtype=np.bool)
mask[np.where(y == 1)[0][::2]] = 0
mask[np.where(y == 2)[0][::2]] = 0
# this leads to perfect classification on one fold and a score of 1/3 on
# the other
svm = SVC(kernel='linear')
# create "cv" for splits
cv = [[mask, ~mask], [~mask, mask]]
# once with iid=True (default)
grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv)
grid_search.fit(X, y)
first = grid_search.grid_scores_[0]
assert_equal(first.parameters['C'], 1)
assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
# for first split, 1/4 of dataset is in test, for second 3/4.
# take weighted average
assert_almost_equal(first.mean_validation_score,
1 * 1. / 4. + 1. / 3. * 3. / 4.)
# once with iid=False
grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv,
iid=False)
grid_search.fit(X, y)
first = grid_search.grid_scores_[0]
assert_equal(first.parameters['C'], 1)
# scores are the same as above
assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
# averaged score is just mean of scores
assert_almost_equal(first.mean_validation_score,
np.mean(first.cv_validation_scores))
评论列表
文章目录