python类RidgeCV()的实例源码-面圈网

nnnba.py 文件源码项目：nnnba 作者: joeyism 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def __remodel__(self, model_type, regr, __X_train, __Y_train):
        """
        Function to retrain certain models based on optimal alphas and/or ratios
        """
        if model_type == "ridge":
            alpha = regr.alpha_
            regr = linear_model.RidgeCV(alphas = self.__realpha__(alpha), cv = 10)
        elif model_type == "lasso":
            alpha = regr.alpha_
            regr = linear_model.LassoCV(alphas = self.__realpha__(alpha), max_iter = 5000, cv = 10)
        elif model_type == "elasticnet":
            alpha = regr.alpha_
            ratio = regr.l1_ratio_
            regr = linear_model.ElasticNetCV(l1_ratio = self.__reratio__(ratio), alphas = self.__elasticnet_init["alpha"], max_iter = 1000, cv = 3)

        regr.fit(__X_train, __Y_train)
        return regr

house_prices.py 文件源码项目：HousePrices 作者: MizioAnd 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def predicted_vs_actual_sale_price(self, x_train, y_train, title_name):
        # Split the training data into an extra set of test
        x_train_split, x_test_split, y_train_split, y_test_split = train_test_split(x_train, y_train)
        print(np.shape(x_train_split), np.shape(x_test_split), np.shape(y_train_split), np.shape(y_test_split))
        lasso = LassoCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
                                0.3, 0.6, 1],
                        max_iter=50000, cv=10)
        # lasso = RidgeCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
        #                         0.3, 0.6, 1], cv=10)

        lasso.fit(x_train_split, y_train_split)
        y_predicted = lasso.predict(X=x_test_split)
        plt.figure(figsize=(10, 5))
        plt.scatter(y_test_split, y_predicted, s=20)
        rmse_pred_vs_actual = self.rmse(y_predicted, y_test_split)
        plt.title(''.join([title_name, ', Predicted vs. Actual.', ' rmse = ', str(rmse_pred_vs_actual)]))
        plt.xlabel('Actual Sale Price')
        plt.ylabel('Predicted Sale Price')
        plt.plot([min(y_test_split), max(y_test_split)], [min(y_test_split), max(y_test_split)])
        plt.tight_layout()

regressor.py 文件源码项目：EarlyWarning 作者: wjlei1990 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def train_ridge_linear_model(_train_x, train_y, _predict_x,
                             sample_weight=None):
    print_title("Ridge Regressor")
    train_x, predict_x = \
        standarize_feature(_train_x, _predict_x)

    # using the default CV
    alphas = [0.1, 1, 10, 100, 1e3, 1e4, 2e4, 5e4, 8e4, 1e5, 1e6, 1e7, 1e8]
    reg = linear_model.RidgeCV(alphas=alphas, store_cv_values=True)
    #reg.fit(train_x, train_y, sample_weight=sample_weight)
    reg.fit(train_x, train_y)
    cv_mse = np.mean(reg.cv_values_, axis=0)
    print("alphas: %s" % alphas)
    print("CV MSE: %s" % cv_mse)
    print("Best alpha using built-in RidgeCV: %f" % reg.alpha_)

    # generate the prediction using the best model
    alpha = reg.alpha_
    reg = linear_model.Ridge(alpha=alpha)
    #reg.fit(train_x, train_y, sample_weight=sample_weight)
    reg.fit(train_x, train_y)
    predict_y = reg.predict(predict_x)
    train_y_pred = reg.predict(train_x)

    return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_get_errors_param(self):
        """
        Test known models we can get the cv errors for alpha selection
        """

        # Test original CV models
        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                model = AlphaSelection(model())

                X, y = make_regression()
                model.fit(X, y)

                errors = model._find_errors_param()
                self.assertTrue(len(errors) > 0)
            except YellowbrickValueError:
                self.fail("could not find errors on {}".format(model.name))

test_base.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_clusterer_enforcement(self):
        """
        Assert that only clustering estimators can be passed to cluster viz
        """
        nomodels = [
            SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier
        ]

        for nomodel in nomodels:
            with self.assertRaises(YellowbrickTypeError):
                visualizer = ClusteringScoreVisualizer(nomodel())

        models = [
            KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch
        ]

        for model in models:
            try:
                visualizer = ClusteringScoreVisualizer(model())
            except YellowbrickTypeError:
                self.fail("could not pass clustering estimator to visualizer")

signature_genes.py 文件源码项目：CS-SMAF 作者: brian-cleary 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def build_signature_model(X,gidx,n_alphas=5):
    model = RidgeCV(alphas=(.1,1,10,100,1000,10000,100000),cv=5)
    model.fit(X[gidx].T,X.T)
    return model

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_regressor_cv(self):
        """
        Ensure only "CV" regressors are allowed
        """

        for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
            with self.assertRaises(YellowbrickTypeError):
                alphas = AlphaSelection(model())

        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                alphas = AlphaSelection(model())
            except YellowbrickTypeError:
                self.fail("could not instantiate RegressorCV on alpha selection")

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_store_cv_values(self):
        """
        Assert that store_cv_values is true on RidgeCV
        """

        model = AlphaSelection(RidgeCV())
        self.assertTrue(model.estimator.store_cv_values)

        model = AlphaSelection(RidgeCV(store_cv_values=True))
        self.assertTrue(model.estimator.store_cv_values)

        model = AlphaSelection(RidgeCV(store_cv_values=False))
        self.assertTrue(model.estimator.store_cv_values)

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_get_alphas_param(self):
        """
        Assert that we can get the alphas from ridge, lasso, and elasticnet
        """
        alphas = np.logspace(-10, -2, 100)

        # Test original CV models
        for model in (RidgeCV, LassoCV, ElasticNetCV):
            try:
                model = AlphaSelection(model(alphas=alphas))
                malphas = model._find_alphas_param()
                self.assertTrue(np.array_equal(alphas, malphas))
            except YellowbrickValueError:
                self.fail("could not find alphas on {}".format(model.name))

models.py 文件源码项目：adios 作者: alshedivat 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def fit_thresholds(self, data, alpha, batch_size=128, verbose=0,
                       validation_data=None, cv=None, top_k=None):
        inputs = np.hstack([data[k] for k in self._graph_inputs])
        probs = self.predict(data, batch_size=batch_size)
        targets = {k: data[k] for k in self._graph_outputs}

        if isinstance(alpha, list):
            if validation_data is None and cv is None:
                warnings.warn("Neither validation data, nor the number of "
                              "cross-validation folds is provided. "
                              "The alpha parameter for threshold model will "
                              "be selected based on the default "
                              "cross-validation procedure in RidgeCV.")
            elif validation_data is not None:
                val_inputs = np.hstack([validation_data[k]
                                        for k in self._graph_inputs])
                val_probs = self.predict(validation_data)
                val_targets = {k: validation_data[k]
                               for k in self._graph_outputs}

        if verbose:
            sys.stdout.write("Constructing thresholds.")
            sys.stdout.flush()

        self.t_models = {}
        for k in self._graph_outputs:
            if verbose:
                sys.stdout.write(".")
                sys.stdout.flush()

            T = self._construct_thresholds(probs[k], targets[k])

            if isinstance(alpha, list):
                if validation_data is not None:
                    val_T = self._construct_thresholds(val_probs[k],
                                                       val_targets[k],
                                                       top_k=top_k)
                    score_best, alpha_best = -np.Inf, None
                    for a in alpha:
                        model = lm.Ridge(alpha=a).fit(inputs, T)
                        score = model.score(val_inputs, val_T)
                        if score > score_best:
                            score_best, alpha_best = score, a
                    alpha = alpha_best
                else:
                    model = lm.RidgeCV(alphas=alpha, cv=cv).fit(inputs, T)
                    alpha = model.alpha_

            self.t_models[k] = lm.Ridge(alpha=alpha)
            self.t_models[k].fit(inputs, T)

        if verbose:
            sys.stdout.write("Done.\n")
            sys.stdout.flush()

linear_model.py 文件源码项目：House-Pricing 作者: playing-kaggle 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def ridge_regression():
    ridge = RidgeCV(alphas=[0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6, 10, 30, 60])
    ridge.fit(X_train, y_train)
    alpha = ridge.alpha_
    print("Best alpha :", alpha)
    print("Try again for more precision with alphas centered around " + str(alpha))
    ridge = RidgeCV(alphas=[alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8, alpha * .85,
                            alpha * .9, alpha * .95, alpha, alpha * 1.05, alpha * 1.1, alpha * 1.15,
                            alpha * 1.25, alpha * 1.3, alpha * 1.35, alpha * 1.4],
                    cv=10)
    ridge.fit(X_train, y_train)
    alpha = ridge.alpha_
    print("Best alpha :", alpha)
    print("Ridge RMSE on Training set :", rmse_cv(ridge, X_train, y_train).mean())
    print("Ridge RMSE on Test set :", rmse_cv(ridge, X_test, y_test).mean())
    y_train_rdg = ridge.predict(X_train)
    y_test_rdg = ridge.predict(X_test)
    # Plot residuals
    plt.scatter(y_train_rdg, y_train_rdg - y_train, c="blue", marker="s", label="Training data")
    plt.scatter(y_test_rdg, y_test_rdg - y_test, c="lightgreen", marker="s", label="Validation data")
    plt.title("Linear regression with Ridge regularization")
    plt.xlabel("Predicted values")
    plt.ylabel("Residuals")
    plt.legend(loc="upper left")
    plt.hlines(y=0, xmin=10.5, xmax=13.5, color="red")
    plt.show()
    # Plot predictions
    plt.scatter(y_train_rdg, y_train, c="blue", marker="s", label="Training data")
    plt.scatter(y_test_rdg, y_test, c="lightgreen", marker="s", label="Validation data")
    plt.title("Linear regression with Ridge regularization")
    plt.xlabel("Predicted values")
    plt.ylabel("Real values")
    plt.legend(loc="upper left")
    plt.plot([10.5, 13.5], [10.5, 13.5], c="red")
    plt.show()
    # Plot important coefficients
    coefs = pd.Series(ridge.coef_, index=X_train.columns)
    print("Ridge picked " + str(sum(coefs != 0)) + " features and eliminated the other " + \
          str(sum(coefs == 0)) + " features")
    imp_coefs = pd.concat([coefs.sort_values().head(10),
                           coefs.sort_values().tail(10)])
    imp_coefs.plot(kind="barh")
    plt.title("Coefficients in the Ridge Model")
    plt.show()

    return ridge

blend_ensemble.py 文件源码项目：FinancialRiskControl 作者: XierHacker 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def online(X_org, y_org, test_x, test_uid):
    n_folds = 5
    verbose = True
    shuffle = False

    X = X_org
    y = y_org
    X_submission = test_x

    if shuffle:
        idx = np.random.permutation(y.size)
        X = X[idx]
        y = y[idx]

    skf = list(StratifiedKFold(y, n_folds))

    clfs = [
        RandomForestClassifier().set_params(**INITIAL_PARAMS.get("RFC:one", {})),
        ExtraTreesClassifier().set_params(**INITIAL_PARAMS.get("ETC:one", {})),
        GradientBoostingClassifier().set_params(**INITIAL_PARAMS.get("GBC:one", {})),
        LogisticRegression().set_params(**INITIAL_PARAMS.get("LR:one", {})),
        xgb.XGBClassifier().set_params(**INITIAL_PARAMS.get("XGBC:two", {})),
        xgb.XGBClassifier().set_params(**INITIAL_PARAMS.get("XGBC:one", {})),
        ]

    print "Creating train and test sets for blending."

    dataset_blend_train = np.zeros((X.shape[0], len(clfs)))
    dataset_blend_test = np.zeros((X_submission.shape[0], len(clfs)))

    for j, clf in enumerate(clfs):
        print j, clf
        dataset_blend_test_j = np.zeros((X_submission.shape[0], len(skf)))
        for i, (train, test) in enumerate(skf):
            print "Fold", i
            X_train = X[train]
            y_train = y[train]
            X_test = X[test]
            y_test = y[test]
            clf.fit(X_train, y_train)
            y_submission = clf.predict_proba(X_test)[:,1]
            dataset_blend_train[test, j] = y_submission
            dataset_blend_test_j[:, i] = clf.predict_proba(X_submission)[:,1]
        dataset_blend_test[:,j] = dataset_blend_test_j.mean(1)

    print "Blending."
    # clf = LogisticRegression(C=2, penalty='l2', class_weight='balanced', n_jobs=-1)
    clf = linear_model.RidgeCV(
            alphas=np.linspace(0, 200), cv=LM_CV_NUM)
    # clf = GradientBoostingClassifier(learning_rate=0.02, subsample=0.5, max_depth=6, n_estimators=100)
    clf.fit(dataset_blend_train, y)
    # y_submission = clf.predict_proba(dataset_blend_test)[:,1]
    print clf.coef_, clf.intercept_
    y_submission = clf.predict(dataset_blend_test)  # for RidgeCV

    print "Linear stretch of predictions to [0,1]"
    y_submission = (y_submission - y_submission.min()) / (y_submission.max() - y_submission.min())
    print "blend result"
    save_submission(os.path.join(consts.SUBMISSION_PATH,
                                     MODEL_NAME + '_' + strftime("%m_%d_%H_%M_%S", localtime()) + '.csv'),
                        test_uid, y_submission)