python类LassoLars()的实例源码-面圈网

regressor.py 文件源码项目：EarlyWarning 作者: wjlei1990 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def train_lassolars_model(train_x, train_y, predict_x):
    print_title("LassoLars Regressor")
    reg = linear_model.LassoLarsCV(
        cv=10, n_jobs=3, max_iter=2000, normalize=False)
    reg.fit(train_x, train_y)
    print("alphas and cv_alphas: {0} and {1}".format(
        reg.alphas_.shape, reg.cv_alphas_.shape))
    print("alphas[%d]: %s" % (len(reg.cv_alphas_), reg.cv_alphas_))
    print("mse shape: {0}".format(reg.cv_mse_path_.shape))
    # print("mse: %s" % np.mean(_mse, axis=0))
    # print("mse: %s" % np.mean(_mse, axis=1))
    # index = np.where(reg.alphas_ == reg.alpha_)
    # print("itemindex: %s" % index)
    index = np.where(reg.cv_alphas_ == reg.alpha_)
    _mse_v = np.mean(reg.cv_mse_path_[index, :])
    print("mse value: %f" % _mse_v)

    print("best alpha: %f" % reg.alpha_)
    best_alpha = reg.alpha_
    reg = linear_model.LassoLars(alpha=best_alpha)
    reg.fit(train_x, train_y)
    n_nonzeros = (reg.coef_ != 0).sum()
    print("Non-zeros coef: %d" % n_nonzeros)
    predict_y = reg.predict(predict_x)
    return {'y': predict_y, "coef": reg.coef_}

test_least_angle.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_rank_deficient_design():
    # consistency test that checks that LARS Lasso is handling rank
    # deficient input data (with n_features < rank) in the same way
    # as coordinate descent Lasso
    y = [5, 0, 5]
    for X in ([[5, 0],
               [0, 5],
               [10, 10]],

              [[10, 10, 0],
               [1e-32, 0, 0],
               [0, 0, 1]],
              ):
        # To be able to use the coefs to compute the objective function,
        # we need to turn off normalization
        lars = linear_model.LassoLars(.1, normalize=False)
        coef_lars_ = lars.fit(X, y).coef_
        obj_lars = (1. / (2. * 3.)
                    * linalg.norm(y - np.dot(X, coef_lars_)) ** 2
                    + .1 * linalg.norm(coef_lars_, 1))
        coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
        coef_cd_ = coord_descent.fit(X, y).coef_
        obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
                  + .1 * linalg.norm(coef_cd_, 1))
        assert_less(obj_lars, obj_cd * (1. + 1e-8))

test_least_angle.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when early stopping is used.
    # (test : before, in the middle, and in the last part of the path)
    alphas_min = [10, 0.9, 1e-4]
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01)

    alphas_min = [10, 0.9, 1e-4]
    # same test, with normalization
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True,
                                      tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01)

test_least_angle.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_multitarget():
    # Assure that estimators receiving multidimensional y do the right thing
    X = diabetes.data
    Y = np.vstack([diabetes.target, diabetes.target ** 2]).T
    n_targets = Y.shape[1]

    for estimator in (linear_model.LassoLars(), linear_model.Lars()):
        estimator.fit(X, Y)
        Y_pred = estimator.predict(X)
        Y_dec = assert_warns(DeprecationWarning, estimator.decision_function, X)
        assert_array_almost_equal(Y_pred, Y_dec)
        alphas, active, coef, path = (estimator.alphas_, estimator.active_,
                                      estimator.coef_, estimator.coef_path_)
        for k in range(n_targets):
            estimator.fit(X, Y[:, k])
            y_pred = estimator.predict(X)
            assert_array_almost_equal(alphas[k], estimator.alphas_)
            assert_array_almost_equal(active[k], estimator.active_)
            assert_array_almost_equal(coef[k], estimator.coef_)
            assert_array_almost_equal(path[k], estimator.coef_path_)
            assert_array_almost_equal(Y_pred[:, k], y_pred)

LassoLarsRegression.py 文件源码项目：kaggle 作者: RankingAI 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def train(self):
        """"""
        start = time.time()

        print('size before truncated outliers is %d ' % len(self.TrainData))
        TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        print('size after truncated outliers is %d ' % len(self.TrainData))

        TrainData['longitude'] -= -118600000
        TrainData['latitude'] -= 34220000
        #extra_tr = pd.read_hdf(path_or_buf='%s/p21/eval_train.hdf' % self.InputDir, key='train')
        #self.TrainData = pd.concat([self.TrainData, extra_tr.drop('parcelid', axis= 1)], axis = 1)

        X = self.TrainData.drop(self._l_drop_cols, axis=1)
        Y = self.TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        lr = LassoLars(alpha= self._lr_alpha, max_iter= self._lr_iter, verbose= True)
        self._model = lr.fit(X, Y)
        end = time.time()

        print('Training iterates %d, time consumed %d ' % (self._model.n_iter_, (end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        #with open(self._f_eval_train_model, 'wb') as o_file:
        #    pickle.dump(self._model, o_file, -1)
        #o_file.close()

        #self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
        #                           ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return

__init__.py 文件源码项目：mlprojects-py 作者: srinathperera 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def get_models4ensamble(conf):
    models = []
    #models = [RFRModel(conf), DLModel(conf), LRModel(conf)]
    #models = [LRModel(conf)]
    # see http://scikit-learn.org/stable/modules/linear_model.html

    #0 was too big to run with depth set to 1, and 1 was overfitting a bit

    if conf.command == 1:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
    else:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
    #    "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    models = [
                #DLModel(conf),

                #LRModel(conf, model=linear_model.BayesianRidge()),
                #LRModel(conf, model=linear_model.LassoLars(alpha=.1)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)),
                #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)),
                #LRModel(conf, model=linear_model.Ridge (alpha = .5))
                #   ('linear', LinearRegression(fit_intercept=False))])),
                XGBoostModel(conf, xgb_params, use_cv=True),
                LRModel(conf, model=linear_model.Lasso(alpha = 0.3)),
                RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)),
                ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)),
                #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square'))
              ]
    return models
    #return [XGBoostModel(conf, xgb_params, use_cv=True)]

test_alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_regressor_cv(self):
        """
        Ensure only "CV" regressors are allowed
        """

        for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
            with self.assertRaises(YellowbrickTypeError):
                alphas = AlphaSelection(model())

        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                alphas = AlphaSelection(model())
            except YellowbrickTypeError:
                self.fail("could not instantiate RegressorCV on alpha selection")

scikitlearn.py 文件源码项目：sia-cog 作者: deepakkumar1984 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def getModels():
    result = []
    result.append("LinearRegression")
    result.append("BayesianRidge")
    result.append("ARDRegression")
    result.append("ElasticNet")
    result.append("HuberRegressor")
    result.append("Lasso")
    result.append("LassoLars")
    result.append("Rigid")
    result.append("SGDRegressor")
    result.append("SVR")
    result.append("MLPClassifier")
    result.append("KNeighborsClassifier")
    result.append("SVC")
    result.append("GaussianProcessClassifier")
    result.append("DecisionTreeClassifier")
    result.append("RandomForestClassifier")
    result.append("AdaBoostClassifier")
    result.append("GaussianNB")
    result.append("LogisticRegression")
    result.append("QuadraticDiscriminantAnalysis")
    return result

bench_lasso.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def compute_bench(alpha, n_samples, n_features, precompute):
    lasso_results = []
    lars_lasso_results = []

    it = 0

    for ns in n_samples:
        for nf in n_features:
            it += 1
            print('==================')
            print('Iteration %s of %s' % (it, max(len(n_samples),
                                          len(n_features))))
            print('==================')
            n_informative = nf // 10
            X, Y, coef_ = make_regression(n_samples=ns, n_features=nf,
                                          n_informative=n_informative,
                                          noise=0.1, coef=True)

            X /= np.sqrt(np.sum(X ** 2, axis=0))  # Normalize data

            gc.collect()
            print("- benchmarking Lasso")
            clf = Lasso(alpha=alpha, fit_intercept=False,
                        precompute=precompute)
            tstart = time()
            clf.fit(X, Y)
            lasso_results.append(time() - tstart)

            gc.collect()
            print("- benchmarking LassoLars")
            clf = LassoLars(alpha=alpha, fit_intercept=False,
                            normalize=False, precompute=precompute)
            tstart = time()
            clf.fit(X, Y)
            lars_lasso_results.append(time() - tstart)

    return lasso_results, lars_lasso_results

test_least_angle.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_lars_lstsq():
    # Test that Lars gives least square solution at the end
    # of the path
    X1 = 3 * diabetes.data  # use un-normalized dataset
    clf = linear_model.LassoLars(alpha=0.)
    clf.fit(X1, y)
    coef_lstsq = np.linalg.lstsq(X1, y)[0]
    assert_array_almost_equal(clf.coef_, coef_lstsq)

test_least_angle.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_lasso_lars_vs_lasso_cd(verbose=False):
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results.
    X = 3 * diabetes.data

    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert_less(error, 0.01)

    # similar test, with the classifiers
    for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
        clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y)
        clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8,
                                  normalize=False).fit(X, y)
        err = linalg.norm(clf1.coef_ - clf2.coef_)
        assert_less(err, 1e-3)

    # same test, with normalized data
    X = diabetes.data
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
                                  tol=1e-8)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert_less(error, 0.01)

test_least_angle.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
    # Create an ill-conditioned situation in which the LARS has to go
    # far in the path to converge, and check that LARS and coordinate
    # descent give the same answers
    # Note it used to be the case that Lars had to use the drop for good
    # strategy for this but this is no longer the case with the
    # equality_tolerance checks
    X = [[1e20, 1e20, 0],
         [-1e-32, 0, 0],
         [1, 1, 1]]
    y = [10, 10, 1]
    alpha = .0001

    def objective_function(coef):
        return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef)) ** 2
                + alpha * linalg.norm(coef, 1))

    lars = linear_model.LassoLars(alpha=alpha, normalize=False)
    assert_warns(ConvergenceWarning, lars.fit, X, y)
    lars_coef_ = lars.coef_
    lars_obj = objective_function(lars_coef_)

    coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4, normalize=False)
    cd_coef_ = coord_descent.fit(X, y).coef_
    cd_obj = objective_function(cd_coef_)

    assert_less(lars_obj, cd_obj * (1. + 1e-8))

scikitlearn.py 文件源码项目：sia-cog 作者: deepakkumar1984 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def getSKLearnModel(modelName):
    if modelName == 'LinearRegression':
        model = linear_model.LinearRegression()
    elif modelName == 'BayesianRidge':
        model = linear_model.BayesianRidge()
    elif modelName == 'ARDRegression':
        model = linear_model.ARDRegression()
    elif modelName == 'ElasticNet':
        model = linear_model.ElasticNet()
    elif modelName == 'HuberRegressor':
        model = linear_model.HuberRegressor()
    elif modelName == 'Lasso':
        model = linear_model.Lasso()
    elif modelName == 'LassoLars':
        model = linear_model.LassoLars()
    elif modelName == 'Rigid':
        model = linear_model.Ridge()
    elif modelName == 'SGDRegressor':
        model = linear_model.SGDRegressor()
    elif modelName == 'SVR':
        model = SVR()
    elif modelName=='MLPClassifier':
        model = MLPClassifier()
    elif modelName=='KNeighborsClassifier':
        model = KNeighborsClassifier()
    elif modelName=='SVC':
        model = SVC()
    elif modelName=='GaussianProcessClassifier':
        model = GaussianProcessClassifier()
    elif modelName=='DecisionTreeClassifier':
        model = DecisionTreeClassifier()
    elif modelName=='RandomForestClassifier':
        model = RandomForestClassifier()
    elif modelName=='AdaBoostClassifier':
        model = AdaBoostClassifier()
    elif modelName=='GaussianNB':
        model = GaussianNB()
    elif modelName=='LogisticRegression':
        model = linear_model.LogisticRegression()
    elif modelName=='QuadraticDiscriminantAnalysis':
        model = QuadraticDiscriminantAnalysis()

    return model

test_least_angle.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_lasso_lars_vs_lasso_cd_positive(verbose=False):
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when using the positive option

    # This test is basically a copy of the above with additional positive
    # option. However for the middle part, the comparison of coefficient values
    # for a range of alphas, we had to make an adaptations. See below.

    # not normalized data
    X = 3 * diabetes.data

    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                   positive=True)
    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert_less(error, 0.01)

    # The range of alphas chosen for coefficient comparison here is restricted
    # as compared with the above test without the positive option. This is due
    # to the circumstance that the Lars-Lasso algorithm does not converge to
    # the least-squares-solution for small alphas, see 'Least Angle Regression'
    # by Efron et al 2004. The coefficients are typically in congruence up to
    # the smallest alpha reached by the Lars-Lasso algorithm and start to
    # diverge thereafter.  See
    # https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff

    for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
        clf1 = linear_model.LassoLars(fit_intercept=False, alpha=alpha,
                                      normalize=False, positive=True).fit(X, y)
        clf2 = linear_model.Lasso(fit_intercept=False, alpha=alpha, tol=1e-8,
                                  normalize=False, positive=True).fit(X, y)
        err = linalg.norm(clf1.coef_ - clf2.coef_)
        assert_less(err, 1e-3)

    # normalized data
    X = diabetes.data
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                   positive=True)
    lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
                                  tol=1e-8, positive=True)
    for c, a in zip(lasso_path.T[:-1], alphas[:-1]):  # don't include alpha=0
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert_less(error, 0.01)

allinone.py 文件源码项目：GZ_travelTime 作者: zhilonglu 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_model_list(task_name):

    model_list, name_list = [], []

    model_list.append(linear_model.LinearRegression())
    name_list.append('LR')
    #
    model_list.append(linear_model.SGDRegressor())
    name_list.append('LR_SGD')

    model_list.append(linear_model.Lasso(alpha = 1.0))
    name_list.append('Lasso')

    model_list.append(linear_model.Ridge (alpha = 1.0))
    name_list.append('Ridge')

    model_list.append(linear_model.LassoLars(alpha=.1))
    name_list.append('LassoLars')

    model_list.append(linear_model.BayesianRidge())
    name_list.append('BayesianRidge')

    model_list.append(KernelRidge(alpha=1.0))
    name_list.append('KernelRidge')

    model_list.append(gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1))
    name_list.append('GaussianProcess')

    model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=3))
    name_list.append('KNN_unif')

    model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=3))
    name_list.append('KNN_dist')

    model_list.append(SVR(kernel = 'linear', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
    name_list.append('SVM_linear')

    model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
    name_list.append('SVM_poly')

    model_list.append(SVR(kernel = 'rbf', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
    name_list.append('SVM_rbf')

    model_list.append(DecisionTreeRegressor())
    name_list.append('DT')

    model_list.append(RandomForestRegressor(n_estimators=100, max_depth=None,min_samples_split=2, random_state=0))
    name_list.append('RF')

    model_list.append(ExtraTreesRegressor(n_estimators=100, max_depth=None, max_features='auto', min_samples_split=2, random_state=0))
    name_list.append('ET')

    return model_list, name_list