python类Lasso()的实例源码

main.py 文件源码 项目:OpenAPS 作者: medicinexlab 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def lasso_regression_model(parameter_array):
    alpha_value = parameter_array[0] #alpha value index is first index
    return linear_model.Lasso(alpha=alpha_value, fit_intercept=True, normalize=True, precompute=False, copy_X=True,
                                max_iter=1000, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic')

#Returns the SVR Linear Kernel model
modeltest.py 文件源码 项目:strategy 作者: kanghua309 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
regularize.py 文件源码 项目:DSI-personal-reference-kit 作者: teb311 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def main(dataset_size, test_proportion):
    diabetes = load_diabetes()
    X = diabetes.data[:dataset_size]
    y = diabetes.target[:dataset_size]

    fig, ax_list = plt.subplots(3, 1, figsize=(8, 6))
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Ridge, ax=ax_list[0])
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Lasso, ax=ax_list[1])
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=LinearRegression, ax=ax_list[2])

    plt.tight_layout()
    plt.show()
__init__.py 文件源码 项目:mlprojects-py 作者: srinathperera 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def vote_with_lr(conf, forecasts, best_model_index, y_actual):
    start = time.time()
    best_forecast = forecasts[:, best_model_index]
    forecasts = np.sort(np.delete(forecasts, best_model_index, axis=1), axis=1)
    forecasts = np.where(forecasts <=0, 0.1, forecasts)

    data_train = []

    for i in range(forecasts.shape[0]):
        f_row = forecasts[i,]
        min_diff_to_best = np.min([cal_rmsle(best_forecast[i], f) for f in f_row])
        comb = list(itertools.combinations(f_row,2))
        avg_error = scipy.stats.hmean([cal_rmsle(x,y) for (x,y) in comb])
        data_train.append([min_diff_to_best, avg_error, scipy.stats.hmean(f_row), np.median(f_row), np.std(f_row)])


    X_all = np.column_stack([np.row_stack(data_train), best_forecast])
    if conf.target_as_log:
        y_actual = transfrom_to_log(y_actual)
    #we use 10% full data to train the ensamble and 30% for evalaution
    no_of_training_instances = int(round(len(y_actual)*0.25))
    X_train, X_test, y_train, y_test = train_test_split(no_of_training_instances, X_all, y_actual)
    y_actual_test = y_actual[no_of_training_instances:]

    lr_model =linear_model.Lasso(alpha = 0.2)
    lr_model.fit(X_train, y_train)
    lr_forecast = lr_model.predict(X_test)
    lr_forcast_revered = retransfrom_from_log(lr_forecast)
    calculate_accuracy("vote__lr_forecast " + str(conf.command), y_actual_test, lr_forcast_revered)
    print_time_took(start, "vote_with_lr")
    return lr_forcast_revered
__init__.py 文件源码 项目:mlprojects-py 作者: srinathperera 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_models4ensamble(conf):
    models = []
    #models = [RFRModel(conf), DLModel(conf), LRModel(conf)]
    #models = [LRModel(conf)]
    # see http://scikit-learn.org/stable/modules/linear_model.html

    #0 was too big to run with depth set to 1, and 1 was overfitting a bit

    if conf.command == 1:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
    else:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
    #    "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    models = [
                #DLModel(conf),

                #LRModel(conf, model=linear_model.BayesianRidge()),
                #LRModel(conf, model=linear_model.LassoLars(alpha=.1)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)),
                #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)),
                #LRModel(conf, model=linear_model.Ridge (alpha = .5))
                #   ('linear', LinearRegression(fit_intercept=False))])),
                XGBoostModel(conf, xgb_params, use_cv=True),
                LRModel(conf, model=linear_model.Lasso(alpha = 0.3)),
                RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)),
                ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)),
                #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square'))
              ]
    return models
    #return [XGBoostModel(conf, xgb_params, use_cv=True)]
advanced_supvervised_model_trainer.py 文件源码 项目:healthcareai-py 作者: HealthCatalyst 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def lasso_regression(self, scoring_metric='neg_mean_squared_error',
                         hyperparameter_grid=None,
                         randomized_search=True,
                         number_iteration_samples=2):
        """
        A light wrapper for Sklearn's lasso regression that performs randomized search over an overridable default
        hyperparameter grid.

        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for regression
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)
            number_iteration_samples (int): Number of models to train during the randomized search for exploring the
                hyperparameter space. More may lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel:
        """
        self.validate_regression('Lasso Regression')
        if hyperparameter_grid is None:
            hyperparameter_grid = {"fit_intercept": [True, False]}
            number_iteration_samples = 2

        algorithm = get_algorithm(Lasso,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model
friedman_memory.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)

    est = [ElasticNet(copy_X=False),
           Lasso(copy_X=False)]

    ens.add(est)
    ens.add(KNeighborsRegressor())

    return ens
friedman_memory.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def lasso():
    """Fit Lasso."""
    print("Fitting LAS...", end=" ", flush=True)
    time.sleep(SLEEP)
    t0 = time.time()
    ls = Lasso()
    ls.fit(X, y)
    print_time(t0, "Done", end="")
friedman_memory.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def elasticnet():
    """Fit Elastic Net."""
    print("Fitting ELN...", end=" ", flush=True)
    time.sleep(SLEEP)
    t0 = time.time()
    ls = Lasso()
    ls.fit(X, y)
    print_time(t0, "Done", end="")
scale_ens.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def build_ensemble(kls, **kwargs):
    """Generate ensemble of class kls."""

    ens = kls(**kwargs)
    ens.add([SVR(), RandomForestRegressor(),
             GradientBoostingRegressor(), Lasso(copy_X=False),
             MLPRegressor(shuffle=False, alpha=0.001)])
    ens.add_meta(Lasso(copy_X=False))
    return ens
tbs_ml.py 文件源码 项目:eezzy 作者: 3Blades 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def spot_check(X, y):
    if type == 'regression':
        models = [
        (LinearRegression(), 'Ordinary Least Squares'),
        (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
        (Ridge(), 'Ridge (alpha 1.0)'),
        (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
        (Lasso(), 'Lasso (alpha 1.0)'),
        (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
        (ElasticNet(), 'ElasticNet (alpha 1.0)'),
        (DecisionTreeRegressor(), 'Decision Tree'),
        (KNeighborsRegressor(), 'K-Nearest Neighbors'),

#         (RandomForestRegressor(), 'Random Forest Regressor'),
#         (BaggingRegressor(), 'Bagging Regressor'),
#         (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
#         (SVR(), 'Support Vector Regression')
    ]

    splits = 5
    scores = []

    for model, model_name in models:
        score = check_model(model, splits, X, y)
        # get average score
        scores.append(score)

    model_names = map(lambda x: x[1], models)
    for name, score in zip(model_names, scores):
        print('%s: %f' % (name, score))
model.py 文件源码 项目:poormining 作者: bowenpay 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_classifier(self, X, Y):
        """ ??Lasso??
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        clf = Lasso()
        clf.fit(X, Y)
        return clf
regressor.py 文件源码 项目:EarlyWarning 作者: wjlei1990 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def train_lasso_model(_train_x, train_y, _predict_x):
    print_title("Lasso Regressor")

    train_x, predict_x = \
        standarize_feature(_train_x, _predict_x)

    reg = linear_model.LassoCV(
        precompute=True, cv=5, verbose=1, n_jobs=4)
    reg.fit(train_x, train_y)
    print("alphas: %s" % reg.alphas_)
    print("mse path: %s" % np.mean(reg.mse_path_, axis=1))

    itemindex = np.where(reg.alphas_ == reg.alpha_)
    print("itemindex: %s" % itemindex)
    _mse = np.mean(reg.mse_path_[itemindex[0], :])
    print("Best alpha using bulit-in LassoCV: %f(mse: %f)" %
          (reg.alpha_, _mse))

    alpha = reg.alpha_
    reg = linear_model.Lasso(alpha=alpha)
    reg.fit(train_x, train_y)
    n_nonzeros = (reg.coef_ != 0).sum()
    print("Non-zeros coef: %d" % n_nonzeros)
    predict_y = reg.predict(predict_x)
    train_y_pred = reg.predict(train_x)

    return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
utils.py 文件源码 项目:kaggle-house-price 作者: motakbiri 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def lasso(train ,test , label, alpha = 0.00099, max_iteration = 50000):
    lasso = Lasso(alpha = alpha , max_iter = max_iteration)
    lasso.fit(train,label)

    #prediction on training data
    y_predicton = lasso.predict(train)
    y_test = label
    print("Lasso score on training set: ", rmse(y_test, y_predicton))

    y_predicton = lasso.predict(test)
    y_predicton = np.exp(y_predicton)
    return y_predicton
test_alphas.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def test_regressor_cv(self):
        """
        Ensure only "CV" regressors are allowed
        """

        for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
            with self.assertRaises(YellowbrickTypeError):
                alphas = AlphaSelection(model())

        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                alphas = AlphaSelection(model())
            except YellowbrickTypeError:
                self.fail("could not instantiate RegressorCV on alpha selection")
pipeline.py 文件源码 项目:clue-hackathon 作者: adrinjalali 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def run():
    data = load_binary()

    # Extract features
    user_feat_matrix = process_level2(data)  # X

    del user_feat_matrix['X']['user_id']
    X = user_feat_matrix['X'].values
    X[np.isnan(X)] = 0
    Y = user_feat_matrix['Y']
    Y.fillna(0, inplace=True)
    del user_feat_matrix['X_all']['user_id']
    X_all = user_feat_matrix['X_all'].values
    X_all[np.isnan(X_all)] = 0

    cols = list(Y.columns.values)
    symptoms = ['happy', 'pms', 'sad', 'sensitive_emotion', 'energized', 'exhausted',
                'high_energy', 'low_energy', 'cramps', 'headache', 'ovulation_pain',
                'tender_breasts', 'acne_skin', 'good_skin', 'oily_skin', 'dry_skin']
    with open("result.txt", 'w') as f:
        f.write("user_id,day_in_cycle,symptom,probability\n")

    for symptom in symptoms:
        print(symptom)

        pipeline = Pipeline([
            ('remove_low_variance_features', VarianceThreshold(threshold=0.0)),
            #('standard_scale', StandardScaler()),
            ('estimator', Lasso()),
        ])

        param_grid = {'estimator__alpha': [.1, .3, .5, .7, .8]}
        model = GridSearchCV(pipeline, param_grid = param_grid, n_jobs = 4,
                             verbose=2)
        model.fit(X, s_Y.values)

        print("dumping...")
        data_dir = 'data'
        cycles0 = pd.read_csv(join(data_dir, 'cycles0.csv'))
        c_length = {k:v for k,v in zip(cycles0.user_id.values, cycles0.expected_cycle_length)}
        dump(symptom, model, X_all, c_length, data['users'].user_id)
linvpy.py 文件源码 项目:linvpy 作者: LCAV 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def estimate(self, a, y, initial_x=None):
        """
        :param a: MxN matrix A in the y=Ax equation
        :type a: numpy.ndarray
        :param y: M vector y in the y=Ax equation
        :type y: numpy.ndarray
        :param initial_x: N vector of an initial solution
        :type initial_x: numpy.ndarray
        :return: best estimation of the N vector x in the y=Ax equation
        :rtype: numpy.ndarray

        :Example:

        >>> import numpy as np
        >>> import linvpy as lp

        >>> a = np.matrix([[1, 2], [3, 4], [5, 6]])
        >>> y = np.array([1, 2, 3])

        >>> m = lp.MEstimator()
        >>> m.estimate(a,y)
        array([ -2.95552481e-16,   5.00000000e-01])

        >>> m_ = lp.MEstimator(loss_function=lp.Bisquare, clipping=2.23, \
        regularization=lp.Lasso(), lamb=3)
        >>> initial_solution = np.array([1, 2])
        >>> m_.estimate(a, y, initial_x=initial_solution)
        array([ 0.,  0.])
        """
        return self.irls(a, y, initial_x)
model.py 文件源码 项目:sportsball 作者: jgershen 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def build_model(train_file, attr_file, model_out, algorithm='ridge'):
  classifiers = ['ridge', 'linear', 'lasso', 'rf', 'en']
  if algorithm not in classifiers:
    raise NotImplementedError("only implemented algorithms: " + str(classifiers))

  train_data = pd.read_pickle(train_file)

  attrs = read_attrs(attr_file)
  target_attr = attrs[0]
  usable_attrs = attrs[1:]

  if algorithm == 'ridge':
    clf = Ridge()
  elif algorithm == 'linear':
    clf = LinearRegression()
  elif algorithm == 'lasso':
    clf = Lasso()
  elif algorithm == 'en':
    clf = ElasticNet()
  else:
    clf = RandomForestRegressor()

  logger.debug("Modeling '%s'", target_attr)
  logger.debug("    train set (%d): %s", len(train_data), train_file)
  logger.debug("  Algorithm: %s", algorithm)
  if hasattr(clf, 'coef_'):
    logger.debug('Coefficients:')
    for i,c in enumerate(clf.coef_):
      logger.debug('    %-20s' % usable_attrs[i] + ':', '%20.4f' % c)
  clf.fit(train_data[usable_attrs], train_data[target_attr])

  pickle.dump(clf, open(model_out, 'wb'))
kgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def gs_Lasso( xM, yV, alphas_log = (-1, 1, 9), n_splits=5, n_jobs = -1):

    print(xM.shape, yV.shape)

    clf = linear_model.Lasso()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf5_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf5 = kf5_c.split( xM)
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = n_jobs)

    gs.fit( xM, yV)

    return gs
kgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def gs_Lasso_norm( xM, yV, alphas_log = (-1, 1, 9)):

    print(xM.shape, yV.shape)

    clf = linear_model.Lasso( normalize = True)
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf5_c = model_selection.KFold( n_splits = 5, shuffle=True)
    #kf5 = kf5_c.split( xM)
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = -1)

    gs.fit( xM, yV)

    return gs


问题


面经


文章

微信
公众号

扫码关注公众号