test_search.py 文件源码

python
阅读 36 收藏 0 点赞 0 评论 0

项目:Parallel-SGD 作者: angadgill 项目源码 文件源码
def test_grid_search_iid():
    # test the iid parameter
    # noise-free simple 2d-data
    X, y = make_blobs(centers=[[0, 0], [1, 0], [0, 1], [1, 1]], random_state=0,
                      cluster_std=0.1, shuffle=False, n_samples=80)
    # split dataset into two folds that are not iid
    # first one contains data of all 4 blobs, second only from two.
    mask = np.ones(X.shape[0], dtype=np.bool)
    mask[np.where(y == 1)[0][::2]] = 0
    mask[np.where(y == 2)[0][::2]] = 0
    # this leads to perfect classification on one fold and a score of 1/3 on
    # the other
    svm = SVC(kernel='linear')
    # create "cv" for splits
    cv = [[mask, ~mask], [~mask, mask]]
    # once with iid=True (default)
    grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv)
    grid_search.fit(X, y)
    first = grid_search.grid_scores_[0]
    assert_equal(first.parameters['C'], 1)
    assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
    # for first split, 1/4 of dataset is in test, for second 3/4.
    # take weighted average
    assert_almost_equal(first.mean_validation_score,
                        1 * 1. / 4. + 1. / 3. * 3. / 4.)

    # once with iid=False
    grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv,
                               iid=False)
    grid_search.fit(X, y)
    first = grid_search.grid_scores_[0]
    assert_equal(first.parameters['C'], 1)
    # scores are the same as above
    assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
    # averaged score is just mean of scores
    assert_almost_equal(first.mean_validation_score,
                        np.mean(first.cv_validation_scores))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号