python类make_regression()的实例源码

test_huber.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def make_regression_with_outliers(n_samples=50, n_features=20):
    rng = np.random.RandomState(0)
    # Generate data with outliers by replacing 10% of the samples with noise.
    X, y = make_regression(
        n_samples=n_samples, n_features=n_features,
        random_state=0, noise=0.05)

    # Replace 10% of the sample with noise.
    num_noise = int(0.1 * n_samples)
    random_samples = rng.randint(0, n_samples, num_noise)
    X[random_samples, :] = 2.0 * rng.normal(0, 1, (num_noise, X.shape[1]))
    return X, y
test_samples_generator.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_make_regression_multitarget():
    X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3,
                              n_targets=3, coef=True, noise=1., random_state=0)

    assert_equal(X.shape, (100, 10), "X shape mismatch")
    assert_equal(y.shape, (100, 3), "y shape mismatch")
    assert_equal(c.shape, (10, 3), "coef shape mismatch")
    assert_array_equal(sum(c != 0.0), 3,
                       "Unexpected number of informative features")

    # Test that y ~= np.dot(X, c) + bias + N(0, 1.0)
    assert_almost_equal(np.std(y - np.dot(X, c)), 1.0, decimal=1)
test_gaussian_process.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_mse_solving():
    # test the MSE estimate to be sane.
    # non-regression test for ignoring off-diagonals of feature covariance,
    # testing with nugget that renders covariance useless, only
    # using the mean function, with low effective rank of data
    gp = GaussianProcess(corr='absolute_exponential', theta0=1e-4,
                         thetaL=1e-12, thetaU=1e-2, nugget=1e-2,
                         optimizer='Welch', regr="linear", random_state=0)

    X, y = make_regression(n_informative=3, n_features=60, noise=50,
                           random_state=0, effective_rank=1)

    gp.fit(X, y)
    assert_greater(1000, gp.predict(X, eval_MSE=True)[1].mean())
test_model_selection_insight.py 文件源码 项目:karura 作者: chakki-works 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_insight_regression(self):
        candidates = 4
        X, y = make_regression(
            n_samples=1000, n_features=15, n_informative=candidates,
            n_targets=1)

        df = pd.DataFrame(np.hstack((X, y.reshape([-1, 1]))), columns=["c_{}".format(i) for i in range(X.shape[1])] + ["target"])
        dfe = DataFrameExtension(df, numericals=["target"], target="target")

        insight = ModelSelectionInsight()
        insight.adopt(dfe)

        self.assertTrue(insight.score > 0)
        print(insight.score)
test_feature_selection_insight.py 文件源码 项目:karura 作者: chakki-works 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_insight_regression(self):
        candidates = 4
        X, y = make_regression(
            n_samples=1000, n_features=15, n_informative=candidates,
            n_targets=1)

        df = pd.DataFrame(np.hstack((X, y.reshape([-1, 1]))), columns=["c_{}".format(i) for i in range(X.shape[1])] + ["target"])
        dfe = DataFrameExtension(df, numericals=["target"], target="target")

        insight = FeatureSelectionInsight()
        insight.adopt(dfe)

        print("selected regressor features {}".format(dfe.ftypes.keys()))
        self.assertTrue(candidates <= len(dfe.ftypes) - 1 < candidates * 2)  # -1 is target ftype
genData.py 文件源码 项目:fri 作者: lpfann 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def genRegressionData(n_samples: int = 100, n_features: int = 2, n_redundant: int = 0, strRel: int = 1,
                      n_repeated: int = 0, noise: float = 1, random_state: object = None,
                      partition = None) -> object:
    """Generate synthetic regression data

    Parameters
    ----------
    n_samples : int, optional
        Number of samples
    n_features : int, optional
        Number of features
    n_redundant : int, optional
        Number of features which are part of redundant subsets (weakly relevant)
    strRel : int, optional
        Number of features which are mandatory for the underlying model (strongly relevant)
    n_repeated : int, optional
        Number of features which are clones of existing ones. 
    noise : float, optional
        Noise of the created samples around ground truth.
    random_state : object, optional
        Randomstate object used for generation.

    Returns
    -------
    X : array of shape [n_samples, n_features]
        The generated samples.
    y : array of shape [n_samples]
        The output values (target).

    Raises
    ------
    ValueError
    Wrong parameters for specified amonut of features/samples.
    """ 

    _checkParam(**locals())
    random_state = check_random_state(random_state)

    X = np.zeros((int(n_samples), int(n_features)))

    # Find partitions which defíne the weakly relevant subsets
    if partition is None:
        # Legacy behaviour yielding subsets of size 2
        partition =  int(n_redundant / 2) * [2]
    part_size = len(partition) 

    X_informative, Y = make_regression(n_features=int(strRel + part_size),
                                        n_samples=int(n_samples),
                                        noise=noise,
                                        n_informative=int(strRel),
                                        random_state=random_state,
                                        shuffle=False)

    X = _fillVariableSpace(**locals())

    return X, Y
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_sparse_regression():
    # Check regression with sparse input.

    class CustomSVR(SVR):
        """SVR variant that records the nature of the training set."""

        def fit(self, X, y, sample_weight=None):
            """Modification on fit caries data type for later verification."""
            super(CustomSVR, self).fit(X, y, sample_weight=sample_weight)
            self.data_type_ = type(X)
            return self

    X, y = datasets.make_regression(n_samples=15, n_features=50, n_targets=1,
                                    random_state=42)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix,
                          dok_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)

        # Trained on sparse format
        sparse_classifier = AdaBoostRegressor(
            base_estimator=CustomSVR(),
            random_state=1
        ).fit(X_train_sparse, y_train)

        # Trained on dense format
        dense_classifier = dense_results = AdaBoostRegressor(
            base_estimator=CustomSVR(),
            random_state=1
        ).fit(X_train, y_train)

        # predict
        sparse_results = sparse_classifier.predict(X_test_sparse)
        dense_results = dense_classifier.predict(X_test)
        assert_array_equal(sparse_results, dense_results)

        # staged_predict
        sparse_results = sparse_classifier.staged_predict(X_test_sparse)
        dense_results = dense_classifier.staged_predict(X_test)
        for sprase_res, dense_res in zip(sparse_results, dense_results):
            assert_array_equal(sprase_res, dense_res)

        types = [i.data_type_ for i in sparse_classifier.estimators_]

        assert all([(t == csc_matrix or t == csr_matrix)
                   for t in types])


问题


面经


文章

微信
公众号

扫码关注公众号