python类LassoCV()的实例源码-面圈网

nnnba.py 文件源码项目：nnnba 作者: joeyism 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def __remodel__(self, model_type, regr, __X_train, __Y_train):
        """
        Function to retrain certain models based on optimal alphas and/or ratios
        """
        if model_type == "ridge":
            alpha = regr.alpha_
            regr = linear_model.RidgeCV(alphas = self.__realpha__(alpha), cv = 10)
        elif model_type == "lasso":
            alpha = regr.alpha_
            regr = linear_model.LassoCV(alphas = self.__realpha__(alpha), max_iter = 5000, cv = 10)
        elif model_type == "elasticnet":
            alpha = regr.alpha_
            ratio = regr.l1_ratio_
            regr = linear_model.ElasticNetCV(l1_ratio = self.__reratio__(ratio), alphas = self.__elasticnet_init["alpha"], max_iter = 1000, cv = 3)

        regr.fit(__X_train, __Y_train)
        return regr

model.py 文件源码项目：DriverPower 作者: smshuai 项目源码文件源码阅读 123 收藏 0 点赞 0 评论 0

def run_lasso(X, y, max_iter=3000, cv=5, n_threads=1):
    """ Implement LassoCV in sklearn

    Args:
        X (np.array): scaled X.
        y (pd.df): four columns response table. 
        max_iter (int): max iteration. 
        cv (int): CV fold.
        n_threads (int): Number of threads to use for parallel computing.

    Returns:
        float: trained alpha value.

    """
    logger.info('Implementing LassoCV with {} iter. and {}-fold CV'.format(max_iter, cv))
    # generate logit response
    y_logit = logit((y.nMut + 0.5) / (y.length * y.N))
    # sub-sampling X and y (300,000)
    use_ix = np.random.choice(y_logit.shape[0], 300000, replace=False)
    Xsub = X[use_ix, :]
    ysub = y_logit[use_ix]
    reg = LassoCV(max_iter=max_iter, cv=cv, copy_X=False, n_jobs=n_threads)
    lassocv = reg.fit(Xsub, ysub)
    logger.info('LassoCV alpha = {}'.format(lassocv.alpha_))
    return lassocv.alpha_

house_prices.py 文件源码项目：HousePrices 作者: MizioAnd 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def predicted_vs_actual_sale_price(self, x_train, y_train, title_name):
        # Split the training data into an extra set of test
        x_train_split, x_test_split, y_train_split, y_test_split = train_test_split(x_train, y_train)
        print(np.shape(x_train_split), np.shape(x_test_split), np.shape(y_train_split), np.shape(y_test_split))
        lasso = LassoCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
                                0.3, 0.6, 1],
                        max_iter=50000, cv=10)
        # lasso = RidgeCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
        #                         0.3, 0.6, 1], cv=10)

        lasso.fit(x_train_split, y_train_split)
        y_predicted = lasso.predict(X=x_test_split)
        plt.figure(figsize=(10, 5))
        plt.scatter(y_test_split, y_predicted, s=20)
        rmse_pred_vs_actual = self.rmse(y_predicted, y_test_split)
        plt.title(''.join([title_name, ', Predicted vs. Actual.', ' rmse = ', str(rmse_pred_vs_actual)]))
        plt.xlabel('Actual Sale Price')
        plt.ylabel('Predicted Sale Price')
        plt.plot([min(y_test_split), max(y_test_split)], [min(y_test_split), max(y_test_split)])
        plt.tight_layout()

TermDocMatrix.py 文件源码项目：scattertext 作者: JasonKessler 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def get_logistic_regression_coefs_l1(self, category,
                                         clf=LassoCV(alphas=[0.1, 0.001],
                                                     max_iter=10000,
                                                     n_jobs=-1)):
        ''' Computes l1-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        from sklearn.cross_validation import cross_val_predict
        y = self._get_mask_from_category(category)
        y_continuous = self._get_continuous_version_boolean_y(y)
        # X = TfidfTransformer().fit_transform(self._X)
        X = self._X

        clf.fit(X, y_continuous)
        y_hat = (cross_val_predict(clf, X, y_continuous) > 0)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        clf.fit(X, y_continuous)
        return clf.coef_, acc, baseline

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_get_errors_param(self):
        """
        Test known models we can get the cv errors for alpha selection
        """

        # Test original CV models
        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                model = AlphaSelection(model())

                X, y = make_regression()
                model.fit(X, y)

                errors = model._find_errors_param()
                self.assertTrue(len(errors) > 0)
            except YellowbrickValueError:
                self.fail("could not find errors on {}".format(model.name))

lasso_regression.py 文件源码项目：House-Pricing 作者: playing-kaggle 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def lasso_train(X, y):
    model_lasso = LassoCV(alphas = [1, 0.1, 0.001, 0.0005]).fit(X, y)
    print ('lasso mean cv is ',rmse_cv(model_lasso,X,y).mean())

    return model_lasso

#%%

regressor.py 文件源码项目：EarlyWarning 作者: wjlei1990 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def train_lasso_model(_train_x, train_y, _predict_x):
    print_title("Lasso Regressor")

    train_x, predict_x = \
        standarize_feature(_train_x, _predict_x)

    reg = linear_model.LassoCV(
        precompute=True, cv=5, verbose=1, n_jobs=4)
    reg.fit(train_x, train_y)
    print("alphas: %s" % reg.alphas_)
    print("mse path: %s" % np.mean(reg.mse_path_, axis=1))

    itemindex = np.where(reg.alphas_ == reg.alpha_)
    print("itemindex: %s" % itemindex)
    _mse = np.mean(reg.mse_path_[itemindex[0], :])
    print("Best alpha using bulit-in LassoCV: %f(mse: %f)" %
          (reg.alpha_, _mse))

    alpha = reg.alpha_
    reg = linear_model.Lasso(alpha=alpha)
    reg.fit(train_x, train_y)
    n_nonzeros = (reg.coef_ != 0).sum()
    print("Non-zeros coef: %d" % n_nonzeros)
    predict_y = reg.predict(predict_x)
    train_y_pred = reg.predict(train_x)

    return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}

test_helpers.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_real_model(self):
        """
        Test that model name works for sklearn estimators
        """
        model1 = LassoCV()
        model2 = LSHForest()
        model3 = KMeans()
        model4 = RandomForestClassifier()
        self.assertEqual(get_model_name(model1), 'LassoCV')
        self.assertEqual(get_model_name(model2), 'LSHForest')
        self.assertEqual(get_model_name(model3), 'KMeans')
        self.assertEqual(get_model_name(model4), 'RandomForestClassifier')

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_regressor_cv(self):
        """
        Ensure only "CV" regressors are allowed
        """

        for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
            with self.assertRaises(YellowbrickTypeError):
                alphas = AlphaSelection(model())

        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                alphas = AlphaSelection(model())
            except YellowbrickTypeError:
                self.fail("could not instantiate RegressorCV on alpha selection")

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_get_alphas_param(self):
        """
        Assert that we can get the alphas from ridge, lasso, and elasticnet
        """
        alphas = np.logspace(-10, -2, 100)

        # Test original CV models
        for model in (RidgeCV, LassoCV, ElasticNetCV):
            try:
                model = AlphaSelection(model(alphas=alphas))
                malphas = model._find_alphas_param()
                self.assertTrue(np.array_equal(alphas, malphas))
            except YellowbrickValueError:
                self.fail("could not find alphas on {}".format(model.name))

linear_model.py 文件源码项目：House-Pricing 作者: playing-kaggle 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def Lasso_regression():
    lasso = LassoCV(alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1,
                            0.3, 0.6, 1],
                    max_iter=50000, cv=10)
    lasso.fit(train_split, y)
    alpha = lasso.alpha_
    print("Best alpha :", alpha)
    print("Try again for more precision with alphas centered around " + str(alpha))
    lasso = LassoCV(alphas=[alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8,
                            alpha * .85, alpha * .9, alpha * .95, alpha, alpha * 1.05,
                            alpha * 1.1, alpha * 1.15, alpha * 1.25, alpha * 1.3, alpha * 1.35,
                            alpha * 1.4],
                    max_iter=50000, cv=10)
    lasso.fit(train_split, y)
    alpha = lasso.alpha_
    print("Best alpha :", alpha)
    print("Lasso RMSE on Training set :", rmse_cv(lasso, train_split, y).mean())

    y_train_las = lasso.predict(train_split)
    # Plot residuals
    plt.scatter(y_train_las, y_train_las - y, c="blue", marker="s", label="Training data")
    plt.title("Linear regression with Lasso regularization")
    plt.xlabel("Predicted values")
    plt.ylabel("Residuals")
    plt.legend(loc="upper left")
    plt.hlines(y=0, xmin=10.5, xmax=13.5, color="red")
    plt.show()
    # Plot predictions
    plt.scatter(y_train_las, y, c="blue", marker="s", label="Training data")
    plt.title("Linear regression with Lasso regularization")
    plt.xlabel("Predicted values")
    plt.ylabel("Real values")
    plt.legend(loc="upper left")
    plt.plot([10.5, 13.5], [10.5, 13.5], c="red")
    plt.show()
    # # Plot important coefficients
    coefs = pd.DataFrame(lasso.coef_, index=X_train.columns,columns=['value'])
    # print("Lasso picked " + str(sum(coefs != 0)) + " features and eliminated the other " + \
    #       str(sum(coefs == 0)) + " features")
    # imp_coefs = pd.concat([coefs.sort_values().head(10),
    #                        coefs.sort_values().tail(10)])
    # imp_coefs.plot(kind="barh")
    # plt.title("Coefficients in the Lasso Model")
    # plt.show()

    return coefs,lasso