python类make_blobs()的实例源码-面圈网

figure.classification.vs.regression.py 文件源码项目：microbiome-summer-school-2017 作者: aldro61 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def make_classification_example(axis, random_state):
    X, y = make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=2.7, random_state=random_state)

    axis.scatter(X[y == 0, 0], X[y == 0, 1], color="red", s=10, label="Disease")
    axis.scatter(X[y == 1, 0], X[y == 1, 1], color="blue", s=10, label="Healthy")

    clf = LinearSVC().fit(X, y)

    # get the separating hyperplane
    w = clf.coef_[0]
    a = -w[0] / w[1]
    xx = np.linspace(-5, 7)
    yy = a * xx - (clf.intercept_[0]) / w[1]

    # plot the line, the points, and the nearest vectors to the plane
    axis.plot(xx, yy, 'k-', color="black", label="Model")

    ax1.tick_params(labelbottom='off', labelleft='off')
    ax1.set_xlabel("Gene 1")
    ax1.set_ylabel("Gene 2")
    ax1.legend()

test_pairwise_distance.py 文件源码项目：pairwise_distance 作者: oliviaguest 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def generate_data(N, seed=10):
    """ This generates some test data that we can use to test our pairwise-
    distance functions.

    Required arguments:
    N       -- The number of datapoints in the test data.

    Optional arguments:
    seed    -- The seed for NumPy's random module.
    """

    # Generate some data:
    np.random.seed(seed)
    n_samples1 = N * 3 // 4  # same as floor(3/4 * N)
    n_samples2 = N - n_samples1

    # Blob set 1
    centers1 = [[0., 0.],
                [1., 0.],
                [0.5, np.sqrt(0.75)]]
    cluster_std1 = [0.3] * len(centers1)
    data, _ = make_blobs(n_samples=n_samples1,
                         centers=centers1,
                         cluster_std=cluster_std1)

    # Make sure Blob 1 checks out

    # Blob set 2
    centers2 = [[0.5, np.sqrt(0.75)]]
    cluster_std2 = [0.3] * len(centers2)
    extra, _ = make_blobs(n_samples=n_samples2,
                          centers=centers2,
                          cluster_std=cluster_std2)

    return np.concatenate((data, extra), axis=0)

gaussian_mixture_model.py 文件源码项目：ML-From-Scratch 作者: eriklindernoren 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def main():
    # Load the dataset
    X, y = datasets.make_blobs()

    # Cluster the data
    clf = GaussianMixtureModel(k=3)
    y_pred = clf.predict(X)

    p = Plot()
    p.plot_in_2d(X, y_pred, title="GMM Clustering")
    p.plot_in_2d(X, y, title="Actual Clustering")

partitioning_around_medoids.py 文件源码项目：ML-From-Scratch 作者: eriklindernoren 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def main():
    # Load the dataset
    X, y = datasets.make_blobs()

    # Cluster the data using K-Medoids
    clf = PAM(k=3)
    y_pred = clf.predict(X)

    # Project the data onto the 2 primary principal components
    p = Plot()
    p.plot_in_2d(X, y_pred, title="PAM Clustering")
    p.plot_in_2d(X, y, title="Actual Clustering")

k_means.py 文件源码项目：ML-From-Scratch 作者: eriklindernoren 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def main():
    # Load the dataset
    X, y = datasets.make_blobs()

    # Cluster the data using K-Means
    clf = KMeans(k=3)
    y_pred = clf.predict(X)

    # Project the data onto the 2 primary principal components
    p = Plot()
    p.plot_in_2d(X, y_pred, title="K-Means Clustering")
    p.plot_in_2d(X, y, title="Actual Clustering")

test_cluster.py 文件源码项目：postlearn 作者: TomAugspurger 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def data_labels():
    return make_blobs(random_state=2)

test_birch.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def test_n_samples_leaves_roots():
    # Sanity check for the number of samples in leaves and roots
    X, y = make_blobs(n_samples=10)
    brc = Birch()
    brc.fit(X)
    n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_])
    n_samples_leaves = sum([sc.n_samples_ for leaf in brc._get_leaves()
                            for sc in leaf.subclusters_])
    assert_equal(n_samples_leaves, X.shape[0])
    assert_equal(n_samples_root, X.shape[0])

test_birch.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_n_clusters():
    # Test that n_clusters param works properly
    X, y = make_blobs(n_samples=100, centers=10)
    brc1 = Birch(n_clusters=10)
    brc1.fit(X)
    assert_greater(len(brc1.subcluster_centers_), 10)
    assert_equal(len(np.unique(brc1.labels_)), 10)

    # Test that n_clusters = Agglomerative Clustering gives
    # the same results.
    gc = AgglomerativeClustering(n_clusters=10)
    brc2 = Birch(n_clusters=gc)
    brc2.fit(X)
    assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_)
    assert_array_equal(brc1.labels_, brc2.labels_)

    # Test that the wrong global clustering step raises an Error.
    clf = ElasticNet()
    brc3 = Birch(n_clusters=clf)
    assert_raises(ValueError, brc3.fit, X)

    # Test that a small number of clusters raises a warning.
    brc4 = Birch(threshold=10000.)
    assert_warns(UserWarning, brc4.fit, X)

test_birch.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_sparse_X():
    # Test that sparse and dense data give same results
    X, y = make_blobs(n_samples=100, centers=10)
    brc = Birch(n_clusters=10)
    brc.fit(X)

    csr = sparse.csr_matrix(X)
    brc_sparse = Birch(n_clusters=10)
    brc_sparse.fit(csr)

    assert_array_equal(brc.labels_, brc_sparse.labels_)
    assert_array_almost_equal(brc.subcluster_centers_,
                              brc_sparse.subcluster_centers_)

test_birch.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_branching_factor():
    # Test that nodes have at max branching_factor number of subclusters
    X, y = make_blobs()
    branching_factor = 9

    # Purposefully set a low threshold to maximize the subclusters.
    brc = Birch(n_clusters=None, branching_factor=branching_factor,
                threshold=0.01)
    brc.fit(X)
    check_branching_factor(brc.root_, branching_factor)
    brc = Birch(n_clusters=3, branching_factor=branching_factor,
                threshold=0.01)
    brc.fit(X)
    check_branching_factor(brc.root_, branching_factor)

    # Raises error when branching_factor is set to one.
    brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01)
    assert_raises(ValueError, brc.fit, X)

test_birch.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def test_threshold():
    # Test that the leaf subclusters have a threshold lesser than radius
    X, y = make_blobs(n_samples=80, centers=4)
    brc = Birch(threshold=0.5, n_clusters=None)
    brc.fit(X)
    check_threshold(brc, 0.5)

    brc = Birch(threshold=5.0, n_clusters=None)
    brc.fit(X)
    check_threshold(brc, 5.)

test_cluster.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def test_birch_example_reproducibility(example_id):
    # check reproducibility of the Birch example
    rng = np.random.RandomState(42)

    X, y = make_blobs(n_samples=1000, n_features=10, random_state=rng)

    cluster_model = Birch(threshold=0.9, branching_factor=20,
                          compute_sample_indices=True)
    cluster_model.fit(X)
    #assert len(cluster_model.root_.subclusters_[1].child_.subclusters_) == 3

    htree, n_subclusters = birch_hierarchy_wrapper(cluster_model)

    assert htree.tree_size == n_subclusters

    # same random seed as in the birch hierarchy example
    assert htree.tree_size == 78
    sc = htree.flatten()[example_id]
    if example_id == 34:
        # this is true in both cases, but example_id fails on circle ci
        assert sc.current_depth == 1
        assert len(sc.children) == 3

    assert_array_equal([sc['cluster_id'] for sc in htree.flatten()],
                       np.arange(htree.tree_size))

utils.py 文件源码项目：simec 作者: cod3licious 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def load_dataset(dataset, n_samples, random_state=1, n_features=3):
    # wrapper function to load one of the 3d datasets
    if dataset == 's_curve':
        return make_s_curve(n_samples, random_state=random_state)
    elif dataset == 'swiss_roll':
        return make_swiss_roll(n_samples, random_state=random_state)
    elif dataset == 'broken_swiss_roll':
        return make_broken_swiss_roll(n_samples, random_state=random_state)
    elif dataset == 'sphere':
        return make_sphere(n_samples, random_state=random_state)
    elif dataset == '3_circles':
        return make_3_circles(n_samples, random_state=random_state)
    elif dataset == 'peaks':
        return make_peaks(n_samples, random_state=random_state)
    elif dataset == 'blobs':
        return make_blobs(n_samples, n_features=n_features, centers=3, random_state=random_state)
    else:
        print("unknown dataset")

GP_multi_classification.py 文件源码项目：Gaussian_process 作者: happyjin 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def dataset_generator():
    """
    generate multi-class dataset
    :return: data X and its labels
    """
    plt.title("Three blobs", fontsize='small')
    X, y = make_blobs(n_features=2, centers=3)
    plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
    plt.show()
    #np.save('X_multi.npy', X)
    #np.save('y_multi.npy', y)
    return X, y

toy.py 文件源码项目：sdp_kmeans 作者: simonsfoundation 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def gaussian_blobs(n_samples=200, return_centers=False):
    random_state = 0
    centers = [(-10, -10), (-10, 0), (0, -10)]
    centers.extend([(10, 10), (10, 0), (0, 10)])
    centers = np.array(centers)
    X, gt = sk_datasets.make_blobs(n_samples=n_samples, centers=centers,
                                   n_features=2, shuffle=False,
                                   random_state=random_state)
    if return_centers:
        return X, gt, centers
    else:
        return X, gt

gaussian_mixture.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def make_clusters(skew=True, *arg, **kwargs):
    X, y = datasets.make_blobs(*arg, **kwargs)
    if skew:
        nrow = X.shape[1]
        for i in np.unique(y):
            X[y == i] = X[y == i].dot(np.random.random((nrow, nrow)) - 0.5)
    return X, y

kmeans.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def kmeans_example(plot=False):
    X, y = make_blobs(centers=4, n_samples=500, n_features=2,
                      shuffle=True, random_state=42)
    clusters = len(np.unique(y))
    k = KMeans(K=clusters, max_iters=150, init='++')
    k.fit(X)
    k.predict()

    if plot:
        k.plot()

test_elbow.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_integrated_kmeans_elbow(self):
        """
        Test no exceptions for kmeans k-elbow visualizer on blobs dataset

        See #182: cannot use occupancy dataset because of memory usage
        """

        # Generate a blobs data set
        X,y = make_blobs(
            n_samples=1000, n_features=12, centers=6, shuffle=True
        )

        try:
            visualizer = KElbowVisualizer(KMeans(), k=4)
            visualizer.fit(X)
            visualizer.poof()
        except Exception as e:
            self.fail("error during k-elbow: {}".format(e))

test_elbow.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_integrated_mini_batch_kmeans_elbow(self):
        """
        Test no exceptions for mini-batch kmeans k-elbow visualizer

        See #182: cannot use occupancy dataset because of memory usage
        """

        # Generate a blobs data set
        X,y = make_blobs(
            n_samples=1000, n_features=12, centers=6, shuffle=True
        )

        try:
            visualizer = KElbowVisualizer(MiniBatchKMeans(), k=4)
            visualizer.fit(X)
            visualizer.poof()
        except Exception as e:
            self.fail("error during k-elbow: {}".format(e))

test_silhouette.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_integrated_kmeans_silhouette(self):
        """
        Test no exceptions for kmeans silhouette visualizer on blobs dataset

        See #182: cannot use occupancy dataset because of memory usage
        """

        # Generate a blobs data set
        X, y = make_blobs(
            n_samples=1000, n_features=12, centers=8, shuffle=True,
        )

        try:
            visualizer = SilhouetteVisualizer(KMeans())
            visualizer.fit(X)
            visualizer.poof()
        except Exception as e:
            self.fail("error during silhouette: {}".format(e))

test_silhouette.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_integrated_mini_batch_kmeans_silhouette(self):
        """
        Test no exceptions for mini-batch kmeans silhouette visualizer

        See #182: cannot use occupancy dataset because of memory usage
        """

        # Generate a blobs data set
        X, y = make_blobs(
            n_samples=1000, n_features=12, centers=8, shuffle=True,
        )

        try:
            visualizer = SilhouetteVisualizer(MiniBatchKMeans())
            visualizer.fit(X)
            visualizer.poof()
        except Exception as e:
            self.fail("error during silhouette: {}".format(e))

plot_lda.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def generate_data(n_samples, n_features):
    """Generate random blob-ish data with noisy features.

    This returns an array of input data with shape `(n_samples, n_features)`
    and an array of `n_samples` target labels.

    Only one feature contains discriminative information, the other features
    contain only noise.
    """
    X, y = make_blobs(n_samples=n_samples, n_features=1, centers=[[-2], [2]])

    # add non-discriminative features
    if n_features > 1:
        X = np.hstack([X, np.random.randn(n_samples, n_features - 1)])
    return X, y

bench_plot_approximate_neighbors.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def make_data(n_samples, n_features, n_queries, random_state=0):
    """Create index and query data."""
    print('Generating random blob-ish data')
    X, _ = make_blobs(n_samples=n_samples + n_queries,
                      n_features=n_features, centers=100,
                      shuffle=True, random_state=random_state)

    # Keep the last samples as held out query vectors: note since we used
    # shuffle=True we have ensured that index and query vectors are
    # samples from the same distribution (a mixture of 100 gaussians in this
    # case)
    return X[:n_samples], X[n_samples:]

test_kde.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_kde_pipeline_gridsearch():
    # test that kde plays nice in pipelines and grid-searches
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    pipe1 = make_pipeline(StandardScaler(with_mean=False, with_std=False),
                          KernelDensity(kernel="gaussian"))
    params = dict(kerneldensity__bandwidth=[0.001, 0.01, 0.1, 1, 10])
    search = GridSearchCV(pipe1, param_grid=params, cv=5)
    search.fit(X)
    assert_equal(search.best_params_['kerneldensity__bandwidth'], .1)

test_search.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_grid_search_no_score():
    # Test grid-search on classifier that has no score function.
    clf = LinearSVC(random_state=0)
    X, y = make_blobs(random_state=0, centers=2)
    Cs = [.1, 1, 10]
    clf_no_score = LinearSVCNoScore(random_state=0)
    grid_search = GridSearchCV(clf, {'C': Cs}, scoring='accuracy')
    grid_search.fit(X, y)

    grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs},
                                        scoring='accuracy')
    # smoketest grid search
    grid_search_no_score.fit(X, y)

    # check that best params are equal
    assert_equal(grid_search_no_score.best_params_, grid_search.best_params_)
    # check that we can call score and that it gives the correct result
    assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y))

    # giving no scoring function raises an error
    grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs})
    assert_raise_message(TypeError, "no scoring", grid_search_no_score.fit,
                         [[1]])

test_search.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def test_grid_search_iid():
    # test the iid parameter
    # noise-free simple 2d-data
    X, y = make_blobs(centers=[[0, 0], [1, 0], [0, 1], [1, 1]], random_state=0,
                      cluster_std=0.1, shuffle=False, n_samples=80)
    # split dataset into two folds that are not iid
    # first one contains data of all 4 blobs, second only from two.
    mask = np.ones(X.shape[0], dtype=np.bool)
    mask[np.where(y == 1)[0][::2]] = 0
    mask[np.where(y == 2)[0][::2]] = 0
    # this leads to perfect classification on one fold and a score of 1/3 on
    # the other
    svm = SVC(kernel='linear')
    # create "cv" for splits
    cv = [[mask, ~mask], [~mask, mask]]
    # once with iid=True (default)
    grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv)
    grid_search.fit(X, y)
    first = grid_search.grid_scores_[0]
    assert_equal(first.parameters['C'], 1)
    assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
    # for first split, 1/4 of dataset is in test, for second 3/4.
    # take weighted average
    assert_almost_equal(first.mean_validation_score,
                        1 * 1. / 4. + 1. / 3. * 3. / 4.)

    # once with iid=False
    grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv,
                               iid=False)
    grid_search.fit(X, y)
    first = grid_search.grid_scores_[0]
    assert_equal(first.parameters['C'], 1)
    # scores are the same as above
    assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
    # averaged score is just mean of scores
    assert_almost_equal(first.mean_validation_score,
                        np.mean(first.cv_validation_scores))

test_search.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def test_gridsearch_no_predict():
    # test grid-search with an estimator without predict.
    # slight duplication of a test from KDE
    def custom_scoring(estimator, X):
        return 42 if estimator.bandwidth == .1 else 0
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    search = GridSearchCV(KernelDensity(),
                          param_grid=dict(bandwidth=[.01, .1, 1]),
                          scoring=custom_scoring)
    search.fit(X)
    assert_equal(search.best_params_['bandwidth'], .1)
    assert_equal(search.best_score_, 42)

test_search.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def test_grid_search_score_consistency():
    # test that correct scores are used
    clf = LinearSVC(random_state=0)
    X, y = make_blobs(random_state=0, centers=2)
    Cs = [.1, 1, 10]
    for score in ['f1', 'roc_auc']:
        grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score)
        grid_search.fit(X, y)
        cv = StratifiedKFold(n_folds=3)
        for C, scores in zip(Cs, grid_search.grid_scores_):
            clf.set_params(C=C)
            scores = scores[2]  # get the separate runs from grid scores
            i = 0
            for train, test in cv.split(X, y):
                clf.fit(X[train], y[train])
                if score == "f1":
                    correct_score = f1_score(y[test], clf.predict(X[test]))
                elif score == "roc_auc":
                    dec = clf.decision_function(X[test])
                    correct_score = roc_auc_score(y[test], dec)
                assert_almost_equal(correct_score, scores[i])
                i += 1

test_grid_search.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def test_grid_search_no_score():
    # Test grid-search on classifier that has no score function.
    clf = LinearSVC(random_state=0)
    X, y = make_blobs(random_state=0, centers=2)
    Cs = [.1, 1, 10]
    clf_no_score = LinearSVCNoScore(random_state=0)
    grid_search = GridSearchCV(clf, {'C': Cs}, scoring='accuracy')
    grid_search.fit(X, y)

    grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs},
                                        scoring='accuracy')
    # smoketest grid search
    grid_search_no_score.fit(X, y)

    # check that best params are equal
    assert_equal(grid_search_no_score.best_params_, grid_search.best_params_)
    # check that we can call score and that it gives the correct result
    assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y))

    # giving no scoring function raises an error
    grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs})
    assert_raise_message(TypeError, "no scoring", grid_search_no_score.fit,
                         [[1]])

test_grid_search.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_grid_search_iid():
    # test the iid parameter
    # noise-free simple 2d-data
    X, y = make_blobs(centers=[[0, 0], [1, 0], [0, 1], [1, 1]], random_state=0,
                      cluster_std=0.1, shuffle=False, n_samples=80)
    # split dataset into two folds that are not iid
    # first one contains data of all 4 blobs, second only from two.
    mask = np.ones(X.shape[0], dtype=np.bool)
    mask[np.where(y == 1)[0][::2]] = 0
    mask[np.where(y == 2)[0][::2]] = 0
    # this leads to perfect classification on one fold and a score of 1/3 on
    # the other
    svm = SVC(kernel='linear')
    # create "cv" for splits
    cv = [[mask, ~mask], [~mask, mask]]
    # once with iid=True (default)
    grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv)
    grid_search.fit(X, y)
    first = grid_search.grid_scores_[0]
    assert_equal(first.parameters['C'], 1)
    assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
    # for first split, 1/4 of dataset is in test, for second 3/4.
    # take weighted average
    assert_almost_equal(first.mean_validation_score,
                        1 * 1. / 4. + 1. / 3. * 3. / 4.)

    # once with iid=False
    grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv,
                               iid=False)
    grid_search.fit(X, y)
    first = grid_search.grid_scores_[0]
    assert_equal(first.parameters['C'], 1)
    # scores are the same as above
    assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
    # averaged score is just mean of scores
    assert_almost_equal(first.mean_validation_score,
                        np.mean(first.cv_validation_scores))