python类GradientBoostingClassifier()的实例源码

models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
gradient_boosting_blending.py 文件源码 项目:DataMiningCompetitionFirstPrize 作者: lzddzh 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def learn(x, y, test_x):
    # set sample weight


    weight_list = []
    for j in range(len(y)):
        if y[j] == "0":
            weight_list.append(variables.weight_0_gdbt_b)
        if y[j] == "1000":
            weight_list.append(variables.weight_1000_gdbt_b)
        if y[j] == "1500":
            weight_list.append(variables.weight_1500_gdbt_b)
        if y[j] == "2000":
            weight_list.append(variables.weight_2000_gdbt_b)

    clf = GradientBoostingClassifier(loss='deviance', n_estimators=variables.n_estimators_gdbt_b,
                                     learning_rate=variables.learning_rate_gdbt_b,
                                     max_depth=variables.max_depth_gdbt_b, random_state=0,
                                     min_samples_split=variables.min_samples_split_gdbt_b,
                                     min_samples_leaf=variables.min_samples_leaf_gdbt_b,
                                     subsample=variables.subsample_gdbt_b,
                                     ).fit(x, y, weight_list)
    prediction_list = clf.predict(test_x)

    return prediction_list
models_classification.py 文件源码 项目:easyML 作者: aarshayj 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __init__(
        self, data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=GradientBoostingClassifier(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output = pd.Series(self.default_parameters)
        self.model_output['Feature_Importance'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)
GradientBoosting.py 文件源码 项目:SecuML 作者: ANSSI-FR 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def createPipeline(self):
        self.pipeline = Pipeline([
            ('model', GradientBoostingClassifier(
                loss = self.conf.loss,
                learning_rate = self.conf.learning_rate,
                n_estimators = self.conf.n_estimators,
                criterion = self.conf.criterion,
                max_depth = self.conf.max_depth,
                min_samples_split = self.conf.min_samples_split,
                min_samples_leaf = self.conf.min_samples_leaf,
                min_weight_fraction_leaf = self.conf.min_weight_fraction_leaf,
                subsample = self.conf.subsample,
                max_features = self.conf.max_features,
                max_leaf_nodes = self.conf.max_leaf_nodes,
                min_impurity_split = self.conf.min_impurity_decrease,
                presort = self.conf.presort))])
SentiCR.py 文件源码 项目:SentiCR 作者: senticr 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_classifier(self):
        algo=self.algo

        if algo=="GBT":
            return GradientBoostingClassifier()
        elif algo=="RF":
            return  RandomForestClassifier()
        elif algo=="ADB":
            return AdaBoostClassifier()
        elif algo =="DT":
            return  DecisionTreeClassifier()
        elif algo=="NB":
            return  BernoulliNB()
        elif algo=="SGD":
            return  SGDClassifier()
        elif algo=="SVC":
            return LinearSVC()
        elif algo=="MLPC":
            return MLPClassifier(activation='logistic',  batch_size='auto',
            early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
            learning_rate_init=0.1, max_iter=5000, random_state=1,
            solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
            warm_start=False)
        return 0
test_boosted_trees_classifier_numeric.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _train_convert_evaluate(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = GradientBoostingClassifier(random_state = 1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        # Get predictions
        df = pd.DataFrame(self.X, columns=self.feature_names)
        df['prediction'] = scikit_model.predict(self.X)

        # Evaluate it
        metrics = evaluate_classifier(spec, df)
        return metrics
test_boosted_trees_classifier.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
User_Interface.py 文件源码 项目:yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def GradientBoostingDecisionTree_Export(action):

        # Setting our classifier to Gradient Boosting
        clf = GradientBoostingClassifier()

        dir = input('Give Data Directory: ')

        if int(action) == 1:
                print('Loading Data')
                PopularityClassifier.loadData(dir)   
                PopularityClassifier.youtubePopular(dir,clf,2)
                PopularityClassifier.twitterPopular(dir,clf,2)       
                PopularityClassifier.bothPopular(dir,clf,2)
        elif int(action) == 2:
                print('Loading Data')
                ViralityClassifier.loadData(dir)
                ViralityClassifier.youtubeViral(dir,clf,2)
                ViralityClassifier.twitterViral(dir,clf,2)
                ViralityClassifier.bothViral(dir,clf,2)
        else:
                print('Loading Data')
                ViralityAndPopularityClassifier.loadData(dir)
                ViralityAndPopularityClassifier.youtubeViralAndPopular(dir,clf,2)
                ViralityAndPopularityClassifier.twitterViralAndPopular(dir,clf,2)
                ViralityAndPopularityClassifier.bothViralAndPopular(dir,clf,2)
GradientBoostingClassifier.py 文件源码 项目:yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def classify():

        #Predict Popularity
        gbdt = GradientBoostingClassifier()
        gbdt.fit(X,YP)
        gbdt.predict(videos)
        print(valVir.shape)
        prediction = gbdt.predict(videos)
        print(prediction)

        same=0
        for i in range(0,valPop.size):
                if valPop[i]==prediction[i]:
                        same = same+1

        accurancy = same/valPop.size *100
        print(accurancy)
classify_user_item.py 文件源码 项目:dut_tianchi_mobile_recommend_train 作者: ningshixian 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def classify_user_item(train_data_new, test_data_new, result9):
    data = np.loadtxt(train_data_new)
    X = data[:, :-1]  # select columns 0 through end-1
    y = data[:, -1]  # select column end
    print X
    print y
    print 'start train'

    clf2 = RandomForestClassifier(n_estimators=100)
    # clf2=GradientBoostingClassifier()
    clf2.fit(X, y)
    # clf2 = LogisticRegression().fit(X, y)
    print clf2.classes_

    data1 = np.loadtxt(test_data_new)
    X_test = data1[:, :]
    print 'testing data is ok'
    result = clf2.predict_proba(X_test)
    print 'output result'
    print result

    f_result = open(result9, 'w')
    for i in range(0, len(result)):
        f_result.write(str(result[i]) + '\n')
gbc.py 文件源码 项目:Quora-Kaggle 作者: PPshrimpGo 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def GradientBoostingClassifier(X_train, y_train, X_test):
    from sklearn.ensemble import GradientBoostingClassifier
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier start in " + now.strftime('%Y-%m-%d %H:%M:%S'))
    GBC = GradientBoostingClassifier(max_features = 'sqrt',
                                     n_estimators = 300,
                                     learning_rate = 0.02,
                                     max_depth = 8,
                                     subsample = 0.8,
                                     n_jobs =4)
    GBC.fit(X_train, y_train)
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier train done in " + now.strftime('%Y-%m-%d %H:%M:%S'))

    y_pred_GBC = GBC.predict_proba(X_test)
    y_pred_GBC = pd.DataFrame(y_pred_GBC[:,1:2],columns=['GBC_predictions'])
    y_pred_GBC.to_csv('GBC_result_all.csv', index=False)
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier predict done in " + now.strftime('%Y-%m-%d %H:%M:%S'))
mlp.py 文件源码 项目:Quora-Kaggle 作者: PPshrimpGo 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def GradientBoostingClassifier(X_train, y_train, X_test):
    from sklearn.ensemble import GradientBoostingClassifier
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier start in " + now.strftime('%Y-%m-%d %H:%M:%S'))
    GBC = GradientBoostingClassifier(max_features = 'sqrt',
                                     n_estimators = 300,
                                     learning_rate = 0.02,
                                     max_depth = 8,
                                     subsample = 0.8)
    GBC.fit(X_train, y_train)
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier train done in " + now.strftime('%Y-%m-%d %H:%M:%S'))

    y_pred_GBC = GBC.predict_proba(X_test)
    y_pred_GBC = pd.DataFrame(y_pred_GBC[:,1:2],columns=['GBC_predictions'])
    y_pred_GBC.to_csv('GBC_result_1.csv', index=False)
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier predict done in " + now.strftime('%Y-%m-%d %H:%M:%S'))
main.py 文件源码 项目:talkbot 作者: nimnull 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def on_startup(app):
    connector = aiohttp.TCPConnector(limit=5, use_dns_cache=True, loop=app.loop)
    session = aiohttp.ClientSession(connector=connector, raise_for_status=True)
    bot = TelegramBot(app['config'].token, session)
    image_model = fit_model(app['config'].sample_df)

    def config_injections(binder):
        # injection bindings
        binder.bind(Config, app['config'])
        binder.bind(TelegramBot, bot)
        binder.bind(GradientBoostingClassifier, image_model)
        binder.bind_to_constructor(AsyncIOMotorDatabase, init_database)


    try:
        inject.configure(config_injections)
    except inject.InjectorException:
        log.error("Injector already configured", exc_info=True)

    setup_logging(log)

    app.loop.create_task(bot.set_hook())
10.5 Gradient_Classifier.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_GradientBoostingClassifier_num(*data):
    '''
    test the performance with different n_estimators
    :param data:    train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    nums=np.arange(1,100,step=2)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for num in nums:
        clf=ensemble.GradientBoostingClassifier(n_estimators=num)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(nums,training_scores,label="Training Score")
    ax.plot(nums,testing_scores,label="Testing Score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("GradientBoostingClassifier")
    plt.show()
10.5 Gradient_Classifier.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_GradientBoostingClassifier_maxdepth(*data):
    '''
    test the performance with different max_depth
    :param data:     train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    maxdepths=np.arange(1,20)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for maxdepth in maxdepths:
        clf=ensemble.GradientBoostingClassifier(max_depth=maxdepth,max_leaf_nodes=None)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(maxdepths,training_scores,label="Training Score")
    ax.plot(maxdepths,testing_scores,label="Testing Score")
    ax.set_xlabel("max_depth")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("GradientBoostingClassifier")
    plt.show()
10.5 Gradient_Classifier.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_GradientBoostingClassifier_learning(*data):
    '''
    test the performance with different learning rate
    :param data:     train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    learnings=np.linspace(0.01,1.0)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for learning in learnings:
        clf=ensemble.GradientBoostingClassifier(learning_rate=learning)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(learnings,training_scores,label="Training Score")
    ax.plot(learnings,testing_scores,label="Testing Score")
    ax.set_xlabel("learning_rate")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("GradientBoostingClassifier")
    plt.show()
10.5 Gradient_Classifier.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_GradientBoostingClassifier_subsample(*data):
    '''
    test the performance with different subsample
    :param data:    train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    subsamples=np.linspace(0.01,1.0)
    testing_scores=[]
    training_scores=[]
    for subsample in subsamples:
            clf=ensemble.GradientBoostingClassifier(subsample=subsample)
            clf.fit(X_train,y_train)
            training_scores.append(clf.score(X_train,y_train))
            testing_scores.append(clf.score(X_test,y_test))
    ax.plot(subsamples,training_scores,label="Training Score")
    ax.plot(subsamples,testing_scores,label="Training Score")
    ax.set_xlabel("subsample")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("GradientBoostingClassifier")
    plt.show()
test_export.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_friedman_mse_in_graphviz():
    clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0)
    clf.fit(X, y)
    dot_data = StringIO()
    export_graphviz(clf, out_file=dot_data)

    clf = GradientBoostingClassifier(n_estimators=2, random_state=0)
    clf.fit(X, y)
    for estimator in clf.estimators_:
        export_graphviz(estimator[0], out_file=dot_data)

    for finding in finditer("\[.*?samples.*?\]", dot_data.getvalue()):
        assert_in("friedman_mse", finding.group())
test_gradient_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def check_classification_toy(presort, loss):
    # Check classification on a toy dataset.
    clf = GradientBoostingClassifier(loss=loss, n_estimators=10,
                                     random_state=1, presort=presort)

    assert_raises(ValueError, clf.predict, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(10, len(clf.estimators_))

    deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:])
    assert_true(np.any(deviance_decrease >= 0.0))

    leaves = clf.apply(X)
    assert_equal(leaves.shape, (6, 10, 1))
test_gradient_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_probability_log():
    # Predict probabilities.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert_true(np.all(y_proba >= 0.0))
    assert_true(np.all(y_proba <= 1.0))

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result)
test_gradient_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_check_inputs_predict():
    # X has wrong shape
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(X, y)

    x = np.array([1.0, 2.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    x = np.array([[]])
    assert_raises(ValueError, clf.predict, x)

    x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
    clf.fit(X, rng.rand(len(X)))

    x = np.array([1.0, 2.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    x = np.array([[]])
    assert_raises(ValueError, clf.predict, x)

    x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)
test_gradient_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_staged_functions_defensive():
    # test that staged_functions make defensive copies
    rng = np.random.RandomState(0)
    X = rng.uniform(size=(10, 3))
    y = (4 * X[:, 0]).astype(np.int) + 1  # don't predict zeros
    for estimator in [GradientBoostingRegressor(),
                      GradientBoostingClassifier()]:
        estimator.fit(X, y)
        for func in ['predict', 'decision_function', 'predict_proba']:
            staged_func = getattr(estimator, "staged_" + func, None)
            if staged_func is None:
                # regressor has no staged_predict_proba
                continue
            with warnings.catch_warnings(record=True):
                staged_result = list(staged_func(X))
            staged_result[1][:] = 0
            assert_true(np.all(staged_result[0] != 0))
test_gradient_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_serialization():
    # Check model serialization.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))

    try:
        import cPickle as pickle
    except ImportError:
        import pickle

    serialized_clf = pickle.dumps(clf, protocol=pickle.HIGHEST_PROTOCOL)
    clf = None
    clf = pickle.loads(serialized_clf)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))
test_gradient_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_more_verbose_output():
    # Check verbose=2 does not cause error.
    from sklearn.externals.six.moves import cStringIO as StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1,
                                     verbose=2)
    clf.fit(X, y)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header = verbose_output.readline().rstrip()
    # no OOB
    true_header = ' '.join(['%10s'] + ['%16s'] * 2) % (
        'Iter', 'Train Loss', 'Remaining Time')
    assert_equal(true_header, header)

    n_lines = sum(1 for l in verbose_output.readlines())
    # 100 lines for n_estimators==100
    assert_equal(100, n_lines)
test_gradient_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_warm_start_oob():
    # Test if warm start OOB equals fit.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
        est = Cls(n_estimators=200, max_depth=1, subsample=0.5,
                  random_state=1)
        est.fit(X, y)

        est_ws = Cls(n_estimators=100, max_depth=1, subsample=0.5,
                     random_state=1, warm_start=True)
        est_ws.fit(X, y)
        est_ws.set_params(n_estimators=200)
        est_ws.fit(X, y)

        assert_array_almost_equal(est_ws.oob_improvement_[:100],
                                  est.oob_improvement_[:100])
test_gradient_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_probability_exponential():
    # Predict probabilities.
    clf = GradientBoostingClassifier(loss='exponential',
                                     n_estimators=100, random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert_true(np.all(y_proba >= 0.0))
    assert_true(np.all(y_proba <= 1.0))
    score = clf.decision_function(T).ravel()
    assert_array_almost_equal(y_proba[:, 1],
                              1.0 / (1.0 + np.exp(-2 * score)))

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result)
test_partial_dependence.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_partial_dependence_classifier():
    # Test partial dependence for classifier
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(X, y)

    pdp, axes = partial_dependence(clf, [0], X=X, grid_resolution=5)

    # only 4 grid points instead of 5 because only 4 unique X[:,0] vals
    assert pdp.shape == (1, 4)
    assert axes[0].shape[0] == 4

    # now with our own grid
    X_ = np.asarray(X)
    grid = np.unique(X_[:, 0])
    pdp_2, axes = partial_dependence(clf, [0], grid=grid)

    assert axes is None
    assert_array_equal(pdp, pdp_2)
models.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1,  max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba
gb.py 文件源码 项目:hyperband 作者: zygmuntz 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def try_params( n_iterations, params ):

    n_estimators = int( round( n_iterations * trees_per_iteration ))
    print "n_estimators:", n_estimators
    pprint( params )

    clf = GB( n_estimators = n_estimators, verbose = 0, **params )

    return train_and_eval_sklearn_classifier( clf, data )


问题


面经


文章

微信
公众号

扫码关注公众号