python类GradientBoostingRegressor()的实例源码-第2页-面圈网

GradientBoostingAgentClass.py 文件源码项目：simple_rl 作者: david-abel 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def add_new_weak_learner(self):
        '''
        Summary:
            Adds a new function, h, to self.weak_learners by solving for Eq. 1 using multiple additive regression trees:

            [Eq. 1] h = argmin_h (sum_i Q_A(s_i,a_i) + h(s_i, a_i) - (r_i + max_b Q_A(s'_i, b)))

        '''
        if len(self.most_recent_episode) == 0:
            # If this episode contains no data, don't do anything.
            return

        # Build up data sets of features and loss terms
        data = np.zeros((len(self.most_recent_episode), self.max_state_features + 1))
        total_loss = np.zeros(len(self.most_recent_episode))

        for i, experience in enumerate(self.most_recent_episode):
            # Grab the experience.
            s, a, r, s_prime = experience

            # Pad in case the state features are too short (as in Atari sometimes).
            features = self._pad_features_with_zeros(s, a)
            loss = (r + self.gamma * self.get_max_q_value(s_prime) - self.get_q_value(s, a))

            # Add to relevant lists.
            data[i] = features
            total_loss[i] = loss

        # Compute new regressor and add it to the weak learners.
        estimator = GradientBoostingRegressor(loss='ls', n_estimators=1, max_depth=self.max_depth)
        estimator.fit(data, total_loss)
        self.weak_learners.append(estimator)

utils_scoring.py 文件源码项目：auto_ml 作者: doordash 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def score(self, estimator, X, y, took_log_of_y=False, advanced_scoring=False, verbose=2, name=None):
        X, y = utils.drop_missing_y_vals(X, y, output_column=None)

        if isinstance(estimator, GradientBoostingRegressor):
            X = X.toarray()

        predictions = estimator.predict(X)

        if took_log_of_y:
            for idx, val in enumerate(predictions):
                predictions[idx] = math.exp(val)

        try:
            score = self.scoring_func(y, predictions)
        except ValueError:

            bad_val_indices = []
            for idx, val in enumerate(y):
                if str(val) in bad_vals_as_strings:
                    bad_val_indices.append(idx)

            predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices]
            y = [val for idx, val in enumerate(y) if idx not in bad_val_indices]

            print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the y values. We will ignore these, and report the score on the rest of the dataset')
            score = self.scoring_func(y, predictions)

        if advanced_scoring == True:
            if hasattr(estimator, 'name'):
                print(estimator.name)
            advanced_scoring_regressors(predictions, y, verbose=verbose, name=name)
        return - 1 * score

gradientboostingmodel.py 文件源码项目：Supply-demand-forecasting 作者: LevinJ 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def setClf(self):
        self.clf = GradientBoostingRegressor(n_estimators=100, verbose=100)
#         self.clf = GradientBoostingRegressor(loss = 'ls', verbose = 300, n_estimators=70,    learning_rate= 0.1,subsample=1.0, max_features = 1.0)
        return

models.py 文件源码项目：AutoML4 作者: djajetic 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1,  max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba

plot_hyperband.py 文件源码项目：fluentopt 作者: mehdidc 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def run_batch(batch):
        for num_iters, params in batch:
            max_depth = params['max_depth']
            learning_rate = params['learning_rate']
            num_iters = int(num_iters)
            reg = GradientBoostingRegressor(
                learning_rate=learning_rate, 
                max_depth=max_depth, 
                n_estimators=num_iters)
            reg.fit(X_train, y_train)
            mse = ((reg.predict(X_test) - y_test)**2).mean()
            yield mse

prepare_data.py 文件源码项目：datasciences 作者: BenChehade 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def greedy_elim(df):

    # do feature selection using boruta
    X = df[[x for x in df.columns if x!='SalePrice']]
    y = df['SalePrice']
    #model = RandomForestRegressor(n_estimators=50)
    model = GradientBoostingRegressor(n_estimators=50, learning_rate=0.05)
    # 150 features seems to be the best at the moment. Why this is is unclear.
    feat_selector = RFE(estimator=model, step=1, n_features_to_select=150)

    # find all relevant features
    feat_selector.fit_transform(X.as_matrix(), y.as_matrix())

    # check selected features
    features_bool = np.array(feat_selector.support_)
    features = np.array(X.columns)
    result = features[features_bool]
    #print(result)

    # check ranking of features
    features_rank = feat_selector.ranking_
    #print(features_rank)
    rank = features_rank[features_bool]
    #print(rank)

    return result

_gradient_boosting_regressor.py 文件源码项目：coremltools 作者: gsabran 项目源码文件源码阅读 56 收藏 0 点赞 0 评论 0

def convert(model, input_features, output_features):
    """Convert a boosted tree model to protobuf format.

    Parameters
    ----------
    decision_tree : GradientBoostingRegressor
        A trained scikit-learn tree model.

    input_feature: [str]
        Name of the input columns.

    output_features: str
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, _ensemble.GradientBoostingRegressor)
    def is_gbr_model(m):
        if len(m.estimators_) == 0:
            return False
        if hasattr(m, 'estimators_') and m.estimators_ is not None:
            for t in m.estimators_.flatten():
                if not hasattr(t, 'tree_') or t.tree_ is None:
                    return False
            return True
        else:
            return False

    _sklearn_util.check_fitted(model, is_gbr_model)

    base_prediction = model.init_.mean

    return _MLModel(_convert_tree_ensemble(model, input_features, output_features,
            base_prediction = base_prediction))

modeltest.py 文件源码项目：strategy 作者: kanghua309 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."

model.py 文件源码项目：CryptoBot 作者: AdeelMufti 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def fit_boosting(X, y, window=100000, estimators=250, learning=.01,
                 samples_leaf=500, depth=20, validate=False):
    '''
    Fits Gradient Boosting
    '''
    model = GradientBoostingRegressor(n_estimators=estimators,
                                      learning_rate=learning,
                                      min_samples_leaf=samples_leaf,
                                      max_depth=depth,
                                      random_state=42)
    if validate:
        return cross_validate(X, y, model, window)
    return model.fit(X, y)

models.py 文件源码项目：kdd2017 作者: JinpengLI 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def __init__(self, **kwargs):
        #print("kwargs=", kwargs)
        self.is_boxcox = kwargs.get("is_boxcox", False)
        self.boxcox_lambda = kwargs.get("boxcox_lambda", 0.0)
        self.Model = kwargs.get("model", GradientBoostingRegressor)
        if "is_boxcox" in kwargs:
            kwargs.pop("is_boxcox")
        if "boxcox_lambda" in kwargs:
            kwargs.pop("boxcox_lambda")
        if "model" in kwargs:
            kwargs.pop("model")
        self.clf = self.Model(**kwargs)

model_comparison.py 文件源码项目：DSI-personal-reference-kit 作者: teb311 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def gradient_boost_grid_search():
    gradient_boost_grid = {
        'loss': ['ls', 'lad', 'huber', 'quantile'],
        'learning_rate': [.0001, .001, .01, .1, 1],
        'n_estimators': [50, 100, 1000, 10000],
        'max_depth': [1, 3],
        'min_samples_split': [2, 4, 10],
        'max_features': ['sqrt', 'log2'],
    }
    gb = GradientBoostingRegressor()

    return gradient_boost_grid, gb

_gradient_boosting_regressor.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def convert(model, input_features, output_features):
    """Convert a boosted tree model to protobuf format.

    Parameters
    ----------
    decision_tree : GradientBoostingRegressor
        A trained scikit-learn tree model.

    input_feature: [str]
        Name of the input columns.

    output_features: str
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, _ensemble.GradientBoostingRegressor)
    def is_gbr_model(m):
        if len(m.estimators_) == 0:
            return False
        if hasattr(m, 'estimators_') and m.estimators_ is not None:
            for t in m.estimators_.flatten():
                if not hasattr(t, 'tree_') or t.tree_ is None:
                    return False
            return True
        else:
            return False

    _sklearn_util.check_fitted(model, is_gbr_model)

    base_prediction = model.init_.mean

    return _MLModel(_convert_tree_ensemble(model, input_features, output_features,
            base_prediction = base_prediction))

test_boosted_trees_regression.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def setUpClass(cls):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return

        scikit_data = load_boston()
        scikit_model = GradientBoostingRegressor(random_state = 1)
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        cls.scikit_data = scikit_data
        cls.scikit_model = scikit_model

test_boosted_trees_regression.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_conversion_bad_inputs(self):

        # Error on converting an untrained model
        with self.assertRaises(Exception):
            model = GradientBoostingRegressor()
            spec = skl_converter.convert(model, 'data', 'out')

        # Check the expected class during covnersion.
        with self.assertRaises(Exception):
            model = OneHotEncoder()
            spec = skl_converter.convert(model, 'data', 'out')

test_boosted_trees_regression.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_conversion_bad_inputs(self):

        # Error on converting an untrained model
        with self.assertRaises(TypeError):
            model = GradientBoostingRegressor()
            spec = xgb_converter.convert(model, 'data', 'out')

        # Check the expected class during conversion
        with self.assertRaises(TypeError):
            model = OneHotEncoder()
            spec = xgb_converter.convert(model, 'data', 'out')

GBDT_solver.py 文件源码项目：tpai_comp 作者: luuuyi 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_model_select_by_param():
    iris = load_iris()    
    gbdt = GradientBoostingRegressor() 
    parameters = {'n_estimators': [1000, 5000], 'max_depth':[3,4]}
    grid_search = GridSearchCV(estimator=gbdt, param_grid=parameters, cv=10, n_jobs=-1)
    print("parameters:")
    pprint.pprint(parameters)
    grid_search.fit(iris.data[:150],iris.target[:150])
    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters=grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))

utils_scoring.py 文件源码项目：auto_ml 作者: ClimbsRocks 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def score(self, estimator, X, y, took_log_of_y=False, advanced_scoring=False, verbose=2, name=None):
        X, y = utils.drop_missing_y_vals(X, y, output_column=None)

        if isinstance(estimator, GradientBoostingRegressor):
            X = X.toarray()

        predictions = estimator.predict(X)

        if took_log_of_y:
            for idx, val in enumerate(predictions):
                predictions[idx] = math.exp(val)

        try:
            score = self.scoring_func(y, predictions)
        except ValueError:

            bad_val_indices = []
            for idx, val in enumerate(y):
                if str(val) in bad_vals_as_strings or str(predictions[idx]) in bad_vals_as_strings:
                    bad_val_indices.append(idx)

            predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices]
            y = [val for idx, val in enumerate(y) if idx not in bad_val_indices]

            print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the predicted or y values. We will ignore these, and report the score on the rest of the dataset')
            score = self.scoring_func(y, predictions)

        if advanced_scoring == True:
            if hasattr(estimator, 'name'):
                print(estimator.name)
            advanced_scoring_regressors(predictions, y, verbose=verbose, name=name)
        return - 1 * score

__init__.py 文件源码项目：mlprojects-py 作者: srinathperera 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def regression_with_GBR(X_train, y_train, X_test, y_test, parmsFromNormalization, params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
          'learning_rate': 0.01, 'loss': 'ls'}):
        #GradientBoostingRegressor
    gfr = GradientBoostingRegressor(**params)
    gfr.fit(X_train, y_train)
    y_pred_gbr = gfr.predict(X_test)
    print_regression_model_summary("GBR", y_test, y_pred_gbr, parmsFromNormalization)
    print_feature_importance(X_test, y_test,gfr.feature_importances_)

    #cross validation ( not sure this make sense for regression
    #http://scikit-learn.org/stable/modules/cross_validation.html
    #gfr = GradientBoostingRegressor(**params)
    #scores = cross_validation.cross_val_score(gfr, X_train, y_train, cv=5)
    #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    return y_pred_gbr

linear_model.py 文件源码项目：House-Pricing 作者: playing-kaggle 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def GDBT_regression(X=train_split,Y=y):
    est = GradientBoostingRegressor(n_estimators=75,max_depth=3,learning_rate=0.1)
    X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=0)
    est.fit(X_train,Y_train)
    y_train_pred = est.predict(X_test)
    plt.scatter(y_train_pred,y_train_pred - Y_test,c = 'blue',marker='s', label='error on training data')

    plt.title("Linear regression with  GDBT")
    plt.xlabel("Predicted values")
    plt.ylabel("Residuals")
    plt.legend(loc="upper left")
    plt.hlines(y=0, xmin=10.5, xmax=13.5, color="red")
    plt.show()
    # Plot predictions
    plt.scatter(Y_test, y_train_pred, c="blue", marker="s", label="Training data")

    plt.title("Linear regression with  GDBT")
    plt.xlabel("Predicted values")
    plt.ylabel("Real values")
    plt.legend(loc="upper left")
    plt.plot([10.5, 13.5], [10.5, 13.5], c="red")
    plt.show()
    print('rmse value:',rmsle(Y_test,y_train_pred))

    return est


# linear_regression()
# ridge_regression()
# Lasso_regression()
#model = Elasticnet_regression()
# '''
#         predict final result
#  '''
#
#
# coefs,lasso = Lasso_regression()
# selected_features = coefs[coefs['value'] != 0].index.values
# train_new = train_split[selected_features]

comparision-tree-based-methods.py 文件源码项目：groot 作者: zhpmatrix 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def gbr(X,y):
    X_train,X_validation,y_train,y_validation = train_test_split(X,y,random_state=0)
    sklearn_boost = GradientBoostingRegressor(random_state=1)
    sklearn_boost.fit(X_train,y_train.ravel())
    print 'training error:',1.0 - sklearn_boost.score(X_train,y_train)
    print 'validation error:',1.0 - sklearn_boost.score(X_validation,y_validation)
    time_fit(sklearn_boost,X_train,y_train.ravel())