python类make_regression()的实例源码-面圈网

optimizers_test.py 文件源码项目：stacker 作者: bamine 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def setUp(self):
        os.putenv("KMP_DUPLICATE_LIB_OK", "TRUE")
        self.X_class, self.y_class = datasets.make_classification(random_state=42)
        self.X_reg, self.y_reg = datasets.make_regression(random_state=42)
        self.classification_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.regression_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.class_scorer = Scorer("auc_error", lambda y_pred, y_true: 1 - metrics.roc_auc_score(y_pred, y_true))
        self.reg_scorer = Scorer("mse", metrics.mean_squared_error)

        self.classification_task_split = \
            Task("class_split", self.X_class, self.y_class, "classification", test_size=0.1, random_state=42)
        self.regression_task_split = \
            Task("reg_split", self.X_class, self.y_class, "regression", test_size=0.1, random_state=42)

        self.classification_task_cv = \
            Task("class_cv", self.X_reg, self.y_reg, "classification", cv=5, random_state=42)
        self.regression_task_cv = \
            Task("reg_cv", self.X_reg, self.y_reg, "regression", cv=5, random_state=42)

test_pyglmnet.py 文件源码项目：pyglmnet 作者: glm-tools 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()
    cv = KFold(X.shape[0], 5)

    glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
    # check that it returns 5 scores
    scores = cross_val_score(glm_normal, X, y, cv=cv)
    assert_equal(len(scores), 5)

    param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
                  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
                                             10, base=np.exp(1))}]
    glmcv = GridSearchCV(glm_normal, param_grid, cv=cv)
    glmcv.fit(X, y)

test_mondrian_partial_fit.py 文件源码项目：scikit-garden 作者: scikit-garden 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_min_samples_split():
    X_c, y_c = load_digits(return_X_y=True)
    X_r, y_r = make_regression(n_samples=10000, random_state=0)

    for mss in [2, 4, 10, 20]:
        mtr = MondrianTreeRegressor(random_state=0, min_samples_split=mss)
        mtr.partial_fit(X_r[: X_r.shape[0] // 2], y_r[: X_r.shape[0] // 2])
        mtr.partial_fit(X_r[X_r.shape[0] // 2:], y_r[X_r.shape[0] // 2:])
        n_node_samples = mtr.tree_.n_node_samples[mtr.tree_.children_left != -1]
        assert_greater(np.min(n_node_samples) + 1, mss)

        mtc = MondrianTreeClassifier(random_state=0, min_samples_split=mss)
        mtc.partial_fit(X_c[: X_c.shape[0] // 2], y_c[: X_c.shape[0] // 2])
        mtc.partial_fit(X_c[X_c.shape[0] // 2:], y_c[X_c.shape[0] // 2:])
        n_node_samples = mtc.tree_.n_node_samples[mtc.tree_.children_left != -1]
        assert_greater(np.min(n_node_samples) + 1, mss)

test_forest_partial_fit.py 文件源码项目：scikit-garden 作者: scikit-garden 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_min_samples_split():
    X_c, y_c = load_digits(return_X_y=True)
    X_r, y_r = make_regression(n_samples=10000, random_state=0)

    for mss in [2, 4, 10, 20]:
        mfr = MondrianForestRegressor(random_state=0, min_samples_split=mss)
        mfr.partial_fit(X_r[: X_r.shape[0] // 2], y_r[: X_r.shape[0] // 2])
        mfr.partial_fit(X_r[X_r.shape[0] // 2:], y_r[X_r.shape[0] // 2:])
        for est in mfr.estimators_:
            n_node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1]
            assert_greater(np.min(n_node_samples) + 1, mss)

        mfc = MondrianForestClassifier(random_state=0, min_samples_split=mss)
        mfc.partial_fit(X_c[: X_c.shape[0] // 2], y_c[: X_c.shape[0] // 2])
        mfc.partial_fit(X_c[X_c.shape[0] // 2:], y_c[X_c.shape[0] // 2:])
        for est in mfc.estimators_:
            n_node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1]
            assert_greater(np.min(n_node_samples) + 1, mss)

nnet_mlp.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def regression():
    # Generate a random regression problem
    X, y = make_regression(n_samples=5000, n_features=25, n_informative=25,
                           n_targets=1, random_state=100, noise=0.05)
    y *= 0.01
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                        random_state=1111)

    model = NeuralNet(
        layers=[
            Dense(64, Parameters(init='normal')),
            Activation('linear'),
            Dense(32, Parameters(init='normal')),
            Activation('linear'),
            Dense(1),
        ],
        loss='mse',
        optimizer=Adam(),
        metric='mse',
        batch_size=256,
        max_epochs=15,
    )
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print("regression mse", mean_squared_error(y_test, predictions.flatten()))

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_get_errors_param(self):
        """
        Test known models we can get the cv errors for alpha selection
        """

        # Test original CV models
        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                model = AlphaSelection(model())

                X, y = make_regression()
                model.fit(X, y)

                errors = model._find_errors_param()
                self.assertTrue(len(errors) > 0)
            except YellowbrickValueError:
                self.fail("could not find errors on {}".format(model.name))

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    mse_scores = cross_val_score(reg, X, y, cv=5, scoring="mean_squared_error")
    expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(mse_scores, expected_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

test_cross_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cval.cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cval.cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    mse_scores = cval.cross_val_score(reg, X, y, cv=5,
                                      scoring="mean_squared_error")
    expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(mse_scores, expected_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cval.cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

test_multioutput.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_multi_target_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    for n in range(3):
        rgr = GradientBoostingRegressor(random_state=0)
        rgr.fit(X_train, y_train[:, n])
        references[:,n] = rgr.predict(X_test)

    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X_train, y_train)
    y_pred = rgr.predict(X_test)

    assert_almost_equal(references, y_pred)

test_ridge.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_ridge_fit_intercept_sparse():
    X, y = make_regression(n_samples=1000, n_features=2, n_informative=2,
                           bias=10., random_state=42)
    X_csr = sp.csr_matrix(X)

    dense = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
    sparse = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
    dense.fit(X, y)
    sparse.fit(X_csr, y)
    assert_almost_equal(dense.intercept_, sparse.intercept_)
    assert_array_almost_equal(dense.coef_, sparse.coef_)

    # test the solver switch and the corresponding warning
    sparse = Ridge(alpha=1., tol=1.e-15, solver='lsqr', fit_intercept=True)
    assert_warns(UserWarning, sparse.fit, X_csr, y)
    assert_almost_equal(dense.intercept_, sparse.intercept_)
    assert_array_almost_equal(dense.coef_, sparse.coef_)

test_samples_generator.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_make_regression():
    X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3,
                              effective_rank=5, coef=True, bias=0.0,
                              noise=1.0, random_state=0)

    assert_equal(X.shape, (100, 10), "X shape mismatch")
    assert_equal(y.shape, (100,), "y shape mismatch")
    assert_equal(c.shape, (10,), "coef shape mismatch")
    assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features")

    # Test that y ~= np.dot(X, c) + bias + N(0, 1.0).
    assert_almost_equal(np.std(y - np.dot(X, c)), 1.0, decimal=1)

    # Test with small number of features.
    X, y = make_regression(n_samples=100, n_features=1)  # n_informative=3
    assert_equal(X.shape, (100, 1))

sklearn_usage.py 文件源码项目：base_function 作者: Rockyzsu 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def lession_5():
    # db = datasets.load_boston()
    # print db.data.shape
    # data_X=db.data
    # data_y=db.target
    # model = LinearRegression()
    # model.fit(data_X,data_y)
    # print model.predict(data_X[:8])
    # print data_y[:8]

    X,y = datasets.make_regression(n_samples=100,n_features=1,n_targets=1,noise=10)

    plt.scatter(X,y)
    plt.show()

figure.classification.vs.regression.py 文件源码项目：microbiome-summer-school-2017 作者: aldro61 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def make_regression_example(axis, random_state):
    X, y = make_regression(n_samples=100, n_features=1, noise=30.0, random_state=random_state)

    axis.scatter(X[:, 0], y, color="blue", s=10, label="Patients")

    clf = LinearSVR().fit(X, y)
    axis.plot(X[:, 0], clf.predict(X), color="black", label="Model")

    ax2.tick_params(labelbottom='off', labelleft='off')
    ax2.set_xlabel("Gene 1")
    ax2.set_ylabel("Survived (years)")
    ax2.legend()

linear_regression.py 文件源码项目：ML-From-Scratch 作者: eriklindernoren 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def main():

    X, y = make_regression(n_samples=100, n_features=1, noise=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    model = LinearRegression(n_iterations=100)

    model.fit(X_train, y_train)

    # Training error plot
    n = len(model.training_errors)
    training, = plt.plot(range(n), model.training_errors, label="Training Error")
    plt.legend(handles=[training])
    plt.title("Error Plot")
    plt.ylabel('Mean Squared Error')
    plt.xlabel('Iterations')
    plt.show()

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print ("Mean squared error: %s" % (mse))

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction")
    plt.suptitle("Linear Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()

test_mondrian.py 文件源码项目：scikit-garden 作者: scikit-garden 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_tau():
    """
    Test time of split for the root.
    """
    X, y = make_regression(random_state=0, n_features=10)
    y = np.round(y)
    rate = np.sum(np.max(X, axis=0) - np.min(X, axis=0))

    for est in estimators:
        est = est.set_params(max_depth=1)
        taus = []
        for random_state in np.arange(100):
            est.set_params(random_state=random_state).fit(X, y)
            taus.append(est.tree_.tau[0])
        assert_almost_equal(np.mean(taus), 1.0 / rate, 2)

test_mondrian_partial_fit.py 文件源码项目：scikit-garden 作者: scikit-garden 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_mondrian_tree_n_node_samples():
    for r in range(1000):
        X, y = make_regression(n_samples=2, random_state=r)
        mtr = MondrianTreeRegressor(random_state=0)
        mtr.partial_fit(X, y)
        assert_array_equal(mtr.tree_.n_node_samples, [1, 1, 2])

test_mondrian_partial_fit.py 文件源码项目：scikit-garden 作者: scikit-garden 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_partial_fit_equivalence():
    X, y = make_regression(random_state=0, n_samples=100)
    mtr = MondrianTreeRegressor(random_state=0)
    mtr.partial_fit(X, y)
    for batch_size in [10, 20, 25, 50, 90]:
        check_partial_fit_equivalence(batch_size, mtr, 0, X, y)

    X, y = make_classification(random_state=0, n_samples=100)
    mtc = MondrianTreeClassifier(random_state=0)
    mtc.partial_fit(X, y)
    for batch_size in [10, 20, 25, 50, 90]:
        check_partial_fit_equivalence(batch_size, mtc, 0, X, y, is_clf=True)

test_mondrian_partial_fit.py 文件源码项目：scikit-garden 作者: scikit-garden 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_partial_fit_n_samples_1000():
    mtc = MondrianTreeClassifier(random_state=0)
    X, y = load_digits(return_X_y=True)
    check_online_fit(mtc, X, y, 20)

    mtc = MondrianTreeClassifier(random_state=0)
    check_online_fit(mtc, X, y, 100)

    X, y = make_regression(random_state=0, n_samples=10000)
    mtr = MondrianTreeRegressor(random_state=0)
    check_online_fit(mtr, X, y, 100, is_clf=False)

    mtr = MondrianTreeRegressor(random_state=0)
    check_online_fit(mtr, X, y, 20, is_clf=False)

test_elm.py 文件源码项目：extreme-learning-machines 作者: IssamLaradji 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_multioutput_regression():
    """Test whether multi-output regression works as expected."""
    X, y = make_regression(n_samples=200, n_targets=5,
                           random_state=random_state)
    for activation in ACTIVATION_TYPES:
        elm = ELMRegressor(n_hidden=300, activation=activation,
                           random_state=random_state)
        elm.fit(X, y)
        assert_greater(elm.score(X, y), 0.95)

test.py 文件源码项目：STK-INF4000-templates 作者: dhesse 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_known_values(self):
        from sklearn.datasets import make_regression
        X,y, coef = make_regression(200, 15, 15, coef=True)
        np.testing.assert_equal(relevant_features(X, y),
                                coef != 0.0)

nearest_neighbors.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def regression():
    # Generate a random regression problem
    X, y = make_regression(n_samples=500, n_features=5,
                           n_informative=5, n_targets=1,
                           noise=0.05, random_state=1111, bias=0.5)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
                                                        random_state=1111)

    model = knn.KNNRegressor(k=5, distance_func=distance.euclidean)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('regression mse', mean_squared_error(y_test, predictions))

gbm.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def regression():
    # Generate a random regression problem
    X, y = make_regression(n_samples=500, n_features=5, n_informative=5,
                           n_targets=1, noise=0.05, random_state=1111,
                           bias=0.5)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                        random_state=1111)

    model = GradientBoostingRegressor(n_estimators=25, max_depth=5,
                                      max_features=3, )
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('regression, mse: %s'
          % mean_squared_error(y_test.flatten(), predictions.flatten()))

linear_models.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def regression():
    # Generate a random regression problem
    X, y = make_regression(n_samples=10000, n_features=100,
                           n_informative=75, n_targets=1, noise=0.05,
                           random_state=1111, bias=0.5)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
                                                        random_state=1111)

    model = LinearRegression(lr=0.01, max_iters=2000, penalty='l2', C=0.03)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('regression mse', mean_squared_error(y_test, predictions))

random_forest.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def regression():
    # Generate a random regression problem
    X, y = make_regression(n_samples=500, n_features=5, n_informative=5,
                           n_targets=1, noise=0.05, random_state=1111,
                           bias=0.5)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                        random_state=1111)

    model = RandomForestRegressor(n_estimators=50, max_depth=10, max_features=3, )
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('regression, mse: %s'
          % mean_squared_error(y_test.flatten(), predictions.flatten()))

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_get_alphas_param_lassolars(self):
        """
        Assert that we can get alphas from lasso lars.
        """
        X, y = make_regression()
        model = AlphaSelection(LassoLarsCV())
        model.fit(X, y)
        try:
            malphas = model._find_alphas_param()
            self.assertTrue(len(malphas) > 0)
        except YellowbrickValueError:
            self.fail("could not find alphas on {}".format(model.name))

make_dataset.py 文件源码项目：snape 作者: mbernico 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def create_regression_dataset(n_samples, n_features, n_informative, effective_rank, tail_strength,
                              noise, random_state=None):
    """
    Creates a regression dataset

    :param n_samples: number of observations
    :param n_features: number of features
    :param n_informative: number of informative features
    :param n_targets: The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar.
    :param effective_rank: approximate number of singular vectors required to explain data
    :param tail_strength: relative importance of the fat noisy tail of the singular values profile
    :param noise: standard deviation of the gaussian noise applied to the output
    :param random_state: the numpy RandomState
    :return: the requested dataframe
    """
    random_state = get_random_state(random_state)
    X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_informative,
                           n_targets=1, effective_rank=effective_rank, tail_strength=tail_strength,
                           noise=noise, random_state=random_state)

    # cast to a data frame
    df = pd.DataFrame(X)
    # rename X columns
    df = rename_columns(df)
    # and add the Y
    df['y'] = y
    return df

test_sklearn_api.py 文件源码项目：cartesian 作者: Ohjeah 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def test_Symbolic_fit(n_out):
    x, y = make_regression(n_features=2, n_informative=1, n_targets=n_out)
    est = Symbolic(max_nfev=1, lambda_=1).fit(x, y)
    yhat = est.predict(x)
    assert yhat.shape == y.shape

test_sklearn_api.py 文件源码项目：cartesian 作者: Ohjeah 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_Symbolic_joblib():
    x, y = make_regression(n_features=2, n_informative=1, n_targets=1)
    yhat = Symbolic(n_jobs=-1, max_nfev=1, lambda_=1).fit(x, y).predict(x)
    assert yhat.shape == y.shape

test_net.py 文件源码项目：skorch 作者: dnouri 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def data(self):
        X, y = make_regression(
            1000, 20, n_informative=10, bias=0, random_state=0)
        X, y = X.astype(np.float32), y.astype(np.float32).reshape(-1, 1)
        Xt = StandardScaler().fit_transform(X)
        yt = StandardScaler().fit_transform(y)
        return Xt, yt

test_multioutput.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def test_multi_target_regression_one_target():
    # Test multi target regression raises
    X, y = datasets.make_regression(n_targets=1)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    assert_raises(ValueError, rgr.fit, X_train, y_train)