python类XGBRegressor()的实例源码

resnet_regressor.py 文件源码 项目:Brain_Tumor_Segmentation 作者: KarthikRevanuru 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def train_xgboost():
    df = pd.read_csv('survival_data.csv', index_col=0, encoding = 'UTF-7')
    p = np.array([np.mean(np.load('training/%s_flair.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    q = np.array([np.mean(np.load('training/%s_t1.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    r = np.array([np.mean(np.load('training/%s_t1ce.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    s = np.array([np.mean(np.load('training/%s_t2.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])

    y=np.array([])
    t=0
    z=np.array([])
    for ind in range(len(folder_names_train)):
        try:
            temp = df.get_value(str(folder_names_train[ind]),'Survival')
            y=np.append(y,temp)
            temp = df.get_value(str(folder_names_train[ind]),'Age')
            z=np.append(z,np.array([temp]))
        except Exception as e:
            t+=1 
            print (t,str(e),"Label Not found, deleting entry")
            y=np.append(y,0)

    z=np.array([[v] for v in z])

    t=np.concatenate((p,q),axis=1)
    u=np.concatenate((r,s),axis=1)
    x=np.concatenate((t,u),axis=1) 
    #print(x.shape)
    #print (x)
    #print (x.shape,z.shape)
    x=np.concatenate((x,z),axis=1)
    #print (x)
    #clf=linear_model.LogisticRegression(C=1e5)
    #clf = RandomForestRegressor()
    clf = xgb.XGBRegressor()
    clf.fit(x,y)
    return clf
test_boosted_trees_regression_numeric.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _train_convert_evaluate(self, bt_params = {}, **params):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        # Train a model
        xgb_model = xgboost.XGBRegressor(**params)
        xgb_model.fit(self.X, self.target)

        # Convert the model (feature_names can't be given because of XGboost)
        spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, force_32bit_float = False)

        # Get predictions
        df = pd.DataFrame(self.X, columns=self.feature_names)
        df['prediction'] = xgb_model.predict(self.X)

        # Evaluate it
        metrics = evaluate_regressor(spec, df, target = 'target', verbose = False)
        return metrics
model_select.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def xgb_model_select(file_name):  
    train_df = read_from_file(file_name)
    selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]

    print 'Select Model...'
    start_time  = datetime.datetime.now()
    xgb_clf = xgb.XGBRegressor() 
    parameters = {'n_estimators': [120, 100, 140], 'max_depth':[3,5,7,9]}
    grid_search = GridSearchCV(estimator=xgb_clf, param_grid=parameters, cv=10, n_jobs=-1)
    print("parameters:")
    pprint.pprint(parameters)
    grid_search.fit(X, y)
    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters=grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
    end_time = datetime.datetime.now()
    print 'Select Done..., Time Cost: %d' % ((end_time - start_time).seconds)
XGB_solver.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def train_model_for_appcounts(df):
    app_df = df[['appCount','age','gender','education','marriageStatus','haveBaby']]
    known_app = app_df[app_df.appCount.notnull()].as_matrix()
    unknown_app = app_df[app_df.appCount.isnull()].as_matrix()
    y = known_app[:, 0]
    X = known_app[:, 1:]

    print 'Train Xgboost Model(For Missing AppCount)...'
    start_time  = datetime.datetime.now()
    xgb_reg = XGBRegressor(n_estimators=100, max_depth=3)
    xgb_reg.fit(X, y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)

    predicted_app = xgb_reg.predict(unknown_app[:, 1:])
    df.loc[ (df.appCount.isnull()), 'appCount' ] = predicted_app 

    return df, xgb_reg
XGB_solver.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def train_model_for_age(df):
    age_df = df[['age', 'appCount','gender','education','marriageStatus','haveBaby']]
    known_age = age_df[age_df.age != 0].as_matrix()
    unknown_age = age_df[age_df.age == 0].as_matrix()
    y = known_age[:, 0]
    X = known_age[:, 1:]

    print 'Train Xgboost Model(For Missing Age)...'
    start_time  = datetime.datetime.now()
    xgb_reg = XGBRegressor(n_estimators=100, max_depth=3)
    xgb_reg.fit(X, y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)

    predicted_age = xgb_reg.predict(unknown_age[:, 1:])
    df.loc[ (df.age == 0), 'age' ] = predicted_age 

    return df, xgb_reg
XGB_solver.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def generate_XGB_model(train_df):
    train_df.drop(['conversionTime'], axis=1, inplace=True)
    print 'Train And Fix Missing App Count Value...'
    train_df, xgb_appcount = train_model_for_appcounts(train_df)
    joblib.dump(xgb_appcount, 'XGB_missing.model')
    '''print 'Train And Fix Missing Age Value...'
    train_df, xgb_age = train_model_for_age(train_df)
    joblib.dump(xgb_age, 'XGB_age.model')'''
    train_df.drop(['marriageStatus','haveBaby','sitesetID', 'positionType'], axis=1, inplace=True)
    print 'Done'
    print train_df.info()
    print train_df.describe()
    print train_df.isnull().sum()
    train_np = train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]
    print 'Train Xgboost Model...'
    start_time  = datetime.datetime.now()
    xbg_clf = XGBRegressor(n_estimators=100, max_depth=6, objective="binary:logistic", silent=False)
    xbg_clf.fit(X,y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)
    model_df = pd.DataFrame({'columns':list(train_df.columns)[1:], 'values':xbg_clf.feature_importances_})
    print model_df
    return xbg_clf
script_resnet.py 文件源码 项目:huaat_ml_dl 作者: ieee820 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def train_xgboost():
    df = pd.read_csv('data/stage1_labels.csv')
    print(df.head())

    x = np.array([np.mean(np.load('npy_result/%s.npy' % str(id)), axis=0) for id in df['id'].tolist()])
    y = df['cancer'].as_matrix()

    trn_x, val_x, trn_y, val_y = cross_validation.train_test_split(x, y, random_state=42, stratify=y,
                                                                   test_size=0.20)

    clf = xgb.XGBRegressor(max_depth=10,
                           n_estimators=1500,
                           min_child_weight=9,
                           learning_rate=0.05,
                           nthread=8,
                           subsample=0.80,
                           colsample_bytree=0.80,
                           seed=4242)

    clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)], verbose=True, eval_metric='logloss', early_stopping_rounds=50)
    return clf
xgb.py 文件源码 项目:hyperband 作者: zygmuntz 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def try_params( n_iterations, params, get_predictions = False ):

    n_estimators = int( round( n_iterations * trees_per_iteration ))
    print "n_estimators:", n_estimators
    pprint( params )

    model = XGB( n_estimators = n_estimators, nthread = -1, **params )

    return train_and_eval_sklearn_regressor( model, data )
predictor.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def print_results(self, model_name):
        if self.ml_for_analytics and model_name in ('LogisticRegression', 'RidgeClassifier', 'LinearRegression', 'Ridge'):
            self._print_ml_analytics_results_linear_model()

        elif self.ml_for_analytics and model_name in ['RandomForestClassifier', 'RandomForestRegressor', 'XGBClassifier', 'XGBRegressor', 'GradientBoostingRegressor', 'GradientBoostingClassifier', 'LGBMRegressor', 'LGBMClassifier']:
            self._print_ml_analytics_results_random_forest()
predictor.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _get_xgb_feat_importances(self, clf):

        try:
            # Handles case when clf has been created by calling
            # xgb.XGBClassifier.fit() or xgb.XGBRegressor().fit()
            fscore = clf.booster().get_fscore()
        except:
            # Handles case when clf has been created by calling xgb.train.
            # Thus, clf is an instance of xgb.Booster.
            fscore = clf.get_fscore()

        trained_feature_names = self._get_trained_feature_names()

        feat_importances = []

        # Somewhat annoying. XGBoost only returns importances for the features it finds useful.
        # So we have to go in, get the index of the feature from the "feature name" by removing the f before the feature name, and grabbing the rest of that string, which is actually the index of that feature name.
        fscore_list = [[int(k[1:]), v] for k, v in fscore.items()]


        feature_infos = []
        sum_of_all_feature_importances = 0.0

        for idx_and_result in fscore_list:
            idx = idx_and_result[0]
            # Use the index that we grabbed above to find the human-readable feature name
            feature_name = trained_feature_names[idx]
            feat_importance = idx_and_result[1]

            # If we sum up all the feature importances and then divide by that sum, we will be able to have each feature importance as it's relative feature imoprtance, and the sum of all of them will sum up to 1, just as it is in scikit-learn.
            sum_of_all_feature_importances += feat_importance
            feature_infos.append([feature_name, feat_importance])

        sorted_feature_infos = sorted(feature_infos, key=lambda x: x[1])

        print('Here are the feature_importances from the tree-based model:')
        print('The printed list will only contain at most the top 50 features.')
        for feature in sorted_feature_infos[-50:]:
            print(str(feature[0]) + ': ' + str(round(feature[1] / sum_of_all_feature_importances, 4)))
predictor.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _print_ml_analytics_results_random_forest(self):
        try:
            final_model_obj = self.trained_final_model.named_steps['final_model']
        except:
            final_model_obj = self.trained_final_model

        print('\n\nHere are the results from our ' + final_model_obj.model_name)
        if self.name is not None:
            print(self.name)
        print('predicting ' + self.output_column)

        # XGB's Classifier has a proper .feature_importances_ property, while the XGBRegressor does not.
        if final_model_obj.model_name in ['XGBRegressor', 'XGBClassifier']:
            self._get_xgb_feat_importances(final_model_obj.model)

        else:
            trained_feature_names = self._get_trained_feature_names()

            try:
                trained_feature_importances = final_model_obj.model.feature_importances_
            except AttributeError as e:
                # There was a version of LightGBM that had this misnamed to miss the "s" at the end
                trained_feature_importances = final_model_obj.model.feature_importance_

            feature_infos = zip(trained_feature_names, trained_feature_importances)

            sorted_feature_infos = sorted(feature_infos, key=lambda x: x[1])

            print('Here are the feature_importances from the tree-based model:')
            print('The printed list will only contain at most the top 50 features.')
            for feature in sorted_feature_infos[-50:]:
                print(feature[0] + ': ' + str(round(feature[1], 4)))
xgboost_sklearnmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def setClf(self):
        self.clf = XGBRegressor(max_depth=7, learning_rate=0.01, n_estimators=100)

        return
train_tensorflow.py 文件源码 项目:tianchi_power 作者: lvniqi 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def create_features(user_id,is_exp,
                         feature_cloumn_func = lambda day:get_feature_cloumn(None,day,has_user_type=False),
                         load_exp_func = load_user_exp_model,
                         load_func = load_user_model,
                         is_exp_power = False
                        ):
    print user_id
    dataset = get_month_by_id(user_id)
    result = []
    for day in range(1,32):
        feature_column = feature_cloumn_func(day)
        x_ = dataset[feature_column]
        trainer = xgb.XGBRegressor()
        if is_exp:
            if is_exp_power:
                x_ = exp_power(x_)
            load_exp_func(trainer,day,user_id)
        else:
            load_func(trainer,day,user_id)
        y_p = trainer.predict(x_)
        y_p = pd.Series(y_p,name='y_p#%d'%(day-1))
        if not is_exp:
            y_p = np.exp(y_p)
        result.append(y_p)
    result = pd.DataFrame(result).T
    result.index = dataset.index
    for day in range(31):
        result['real#%d'%day] = dataset['y#%d'%day].apply(np.exp)
    sys.stdout.flush()
    return result
scikit_regression_learners.py 文件源码 项目:MENGEL 作者: CodeSpaceHQ 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def train_xgboost_regressor():
    return mp.ModelProperties(regression=True), xgboost.XGBRegressor()
filler_regression.py 文件源码 项目:MENGEL 作者: CodeSpaceHQ 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def apply_filler(self, x_train, y_train, x_test):
        model = xgboost.XGBRegressor()
        model = model.fit(x_train, y_train)
        return model.predict(x_test)
test_boosted_trees_regression.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_unsupported_conversion(self):

        feature_names = self.scikit_data.feature_names
        output_name = 'target'
        xgb_model = xgboost.XGBRegressor(objective = 'reg:gamma')
        xgb_model.fit(self.scikit_data.data, self.scikit_data.target)
        with self.assertRaises(ValueError):
            spec = xgb_converter.convert(xgb_model, feature_names, 'target')

        xgb_model = xgboost.XGBRegressor(objective = 'reg:tweedie')
        xgb_model.fit(self.scikit_data.data, self.scikit_data.target)
        with self.assertRaises(ValueError):
            spec = xgb_converter.convert(xgb_model, feature_names, 'target')
XGB_solver.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test():
    iris = load_iris()  
    xgb_model = xgb.XGBRegressor(n_estimators=300000, max_depth=2)
    xgb_model.fit(iris.data[:120],iris.target[:120])

    predict = xgb_model.predict(iris.data[:120])
    print mean_squared_error(iris.target[:120], predict)

    pred = xgb_model.predict(iris.data[120:])
    print mean_squared_error(iris.target[120:], pred)
__init__.py 文件源码 项目:mlprojects-py 作者: srinathperera 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def regression_with_xgboost(x_train, y_train, X_test, Y_test, features=None, use_cv=True, use_sklean=False, xgb_params=None):
    train_data = xgb.DMatrix(x_train, label=y_train, missing=float('nan'))
    test_data = xgb.DMatrix(X_test, Y_test, missing=float('nan'))
    evallist  = [(test_data,'eval'), (train_data,'train')]

    #if xgb_params == None:
    #    xgb_params = get_default_xgboost_params()

    if not use_cv:
        num_rounds = 10
    else:
        cvresult = xgb.cv(xgb_params, train_data, num_boost_round=100, nfold=5,
            metrics={'rmse'}, show_progress=True)
        print cvresult
        num_rounds = len(cvresult)
    gbdt = None
    if(use_sklean):
        #gbdt = xgboost.XGBRegressor(max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective='reg:linear', nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, seed=0, missing=None)
        xgb_params['n_estimators'] = num_rounds
        gbdt = xgboost.XGBRegressor(xgb_params)

        gbdt.fit(x_train, y_train)
        y_pred = gbdt.predict(X_test)

        return gbdt, y_pred
    else:
        #gbdt = xgb.train( xgb_params, train_data, num_rounds, evallist, verbose_eval = True, early_stopping_rounds=5)
        gbdt = xgb.train( xgb_params, train_data, num_rounds, evallist, verbose_eval = True)

        ceate_feature_map_for_feature_importance(features)
        show_feature_importance(gbdt, feature_names=features)

        y_pred = gbdt.predict(xgb.DMatrix(X_test, missing=float("nan")))
        return XGBoostModel(gbdt), y_pred
main.py 文件源码 项目:Kaggle-DS-Bowl-17 作者: Zephyr-D 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def train_xgboost():
    df = pd.read_csv('data/stage1_labels.csv')
#    print df.head()

    x = []
    y = []
    did = df['id'].tolist()
    cancer = df['cancer'].tolist()
    for i in range(len(df)):
        if os.path.isfile('data/stage1/%s.npy' % did[i]):
            f = np.load('data/stage1/%s.npy' % did[i])
            f = f.reshape(f.shape[0], 2048)
            x.append(np.mean(f, axis=0))
            y.append(cancer[i])

    x = np.array(x)
    print x.shape
    y = np.array(y)

    trn_x, val_x, trn_y, val_y = cross_validation.train_test_split(x, y, random_state=822, stratify=y, test_size=0.1)

    clfs = []
    for s in range(5):
    # Some parameters were taken from discussion.
        clf = xgb.XGBRegressor(n_estimators=1000, max_depth=10, min_child_weight=10,
                               learning_rate=0.01, subsample=0.80, colsample_bytree=0.70,
                               seed=822 + s, reg_alpha=0.1)

        clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)], verbose=True, eval_metric='logloss', early_stopping_rounds=100)
        clfs.append(clf)
    return clfs
comparision-tree-based-methods.py 文件源码 项目:groot 作者: zhpmatrix 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def xgbr(X,y):
    X_train,X_validation,y_train,y_validation = train_test_split(X,y,random_state=0)
    xgbr_boost = xgb.XGBRegressor(seed=1)
    xgbr_boost.fit(X_train,y_train.ravel())
    print 'training error:',1.0 - xgbr_boost.score(X_train,y_train)
    print 'validation error:',1.0 - xgbr_boost.score(X_validation,y_validation)
    time_fit(xgbr_boost,X_train,y_train.ravel())
onegbm.py 文件源码 项目:mars_express 作者: wsteitz 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self):
        self.name = "onegbm"
        self.m = Pipeline([
        ("drop", FeatureRemover(["UPBS", "UPBE", "SCMN", "earthmars_km", "OCC_MARS_200KM_START_", "sa_monthly"])),
        ("gbm", xgboost.XGBRegressor(max_depth=7, n_estimators=1000, learning_rate=0.05, silent=1, seed=42))
        ])
blend.py 文件源码 项目:kaggle_bnp-paribas 作者: ArdalanM 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def models():
    params = {'n_jobs':nthread,'random_state':seed,'class_weight':None}

    # extra = ensemble.ExtraTreesClassifier(n_estimators=1000,max_features='auto',criterion= 'entropy',min_samples_split= 2, max_depth= None, min_samples_leaf= 1, **params)
    # extra1 = ensemble.ExtraTreesClassifier(n_estimators=1000,max_features=60,criterion= 'gini',min_samples_split= 4, max_depth= 40, min_samples_leaf= 2, **params)

    # rf = ensemble.RandomForestClassifier(n_estimators=1000,max_features= 'auto',criterion= 'gini',min_samples_split= 2, max_depth= None, min_samples_leaf= 1, **params)
    # rf1 = ensemble.RandomForestClassifier(n_estimators=1000,max_features=60,criterion= 'entropy',min_samples_split= 4, max_depth= 40, min_samples_leaf= 2, **params)

    # xgb_binlog = XGBClassifier(objective="binary:logistic" ,max_depth=10, learning_rate=0.01, n_estimators=5,nthread=nthread, seed=seed)
    # xgb_reglog = XGBClassifier(objective="reg:logistic", max_depth=10, learning_rate=0.01, n_estimators=5,nthread=nthread, seed=seed)
    # xgb_poi = XGBClassifier(objective="count:poisson", max_depth=10, learning_rate=0.01, n_estimators=5,nthread=nthread, seed=seed)
    # xgb_reglin = XGBClassifier(objective="reg:linear", max_depth=10, learning_rate=0.01, n_estimators=5,nthread=nthread, seed=seed)

    rf_params = {'n_estimators':850,'max_features':60,'criterion':'entropy','min_samples_split': 4,'max_depth': 40, 'min_samples_leaf': 2, 'n_jobs': -1}

    clfs = [
        # (D1, XGBRegressor(objective="reg:linear", max_depth=6, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
        (D1, XGBClassifier(objective="binary:logistic" ,max_depth=6, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
        # (D1, XGBRegressor(objective="reg:linear", max_depth=5, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
        # (D1,XGBClassifier(objective="binary:logistic", max_depth=5, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
        # (D1, XGBRegressor(objective="reg:linear", max_depth=4, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),
        # (D1,XGBClassifier(objective="binary:logistic", max_depth=4, learning_rate=0.01, subsample=.8, n_estimators=2000,nthread=nthread, seed=seed)),

    ]
    for clf in clfs:
        yield clf
test_core.py 文件源码 项目:dask-xgboost 作者: dask 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_regressor(loop):  # noqa
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):
            a = dxgb.XGBRegressor()
            X2 = da.from_array(X, 5)
            y2 = da.from_array(y, 5)
            a.fit(X2, y2)
            p1 = a.predict(X2)

    b = xgb.XGBRegressor()
    b.fit(X, y)
    assert_eq(p1, b.predict(X))
gb_trees.py 文件源码 项目:real_estate 作者: cooperoelrichs 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def make_model(params):
        return xgb.XGBRegressor(**params)
skilearnAlgorithn.py 文件源码 项目:GZ_travelTime 作者: zhilonglu 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def xgb_Fit(knownX,knownY,preX):
    xlf = xgb.XGBRegressor(max_depth=11,
                           learning_rate=0.01,
                           n_estimators=301,
                           silent=True,
                           objective=mape,
                           gamma=0,
                           min_child_weight=5,
                           max_delta_step=0,
                           subsample=0.8,
                           colsample_bytree=0.8,
                           colsample_bylevel=1,
                           reg_alpha=1e0,
                           reg_lambda=0,
                           scale_pos_weight=1,
                           seed=9,
                           missing=None)
    x_train, x_test, y_train, y_test = train_test_split(knownX, knownY, test_size=0.5, random_state=1)
    for i in range(y_train.shape[1]):
        xlf.fit(x_train, y_train[:, i].reshape(-1, 1), eval_metric=mape, verbose=False)
                # eval_set=[(x_test, y_test[:, i].reshape(-1, 1))], early_stopping_rounds=2)
        tempPre = xlf.predict(preX).reshape(-1, 1)
        if i == 0:
            Y_pre = tempPre
        else:
            Y_pre = np.c_[Y_pre, tempPre]
    Y_pre = Y_pre.reshape(-1, 1)
    return Y_pre

#?model??gridsearch
FeatureEngineering.py 文件源码 项目:GZ_travelTime 作者: zhilonglu 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def xgb_Fit(knownX,knownY,preX):
    xlf = xgb.XGBRegressor(max_depth=7,#11
                           learning_rate=0.06,#0.01
                           n_estimators=1000,
                           silent=True,
                           objective=mapeobj,
                           gamma=0,
                           min_child_weight=5,
                           max_delta_step=0,
                           subsample=1,#0.8
                           colsample_bytree=0.8,
                           colsample_bylevel=1,
                           reg_alpha=1e0,
                           reg_lambda=0,
                           scale_pos_weight=1,
                           seed=1850,
                           missing=None)
    x_train, x_test, y_train, y_test = train_test_split(knownX, knownY, test_size=0.5, random_state=1)
    for i in range(y_train.shape[1]):
        xlf.fit(x_train, y_train[:, i].reshape(-1,1))
        # print('Training Error: {:.3f}'.format(1 - xlf.score(x_train,y_train[:,i].reshape(-1,1))))
        # print('Validation Error: {:.3f}'.format(1 - xlf.score(x_test,y_test[:,i].reshape(-1,1))))
        #predict value for output
        tempPre = xlf.predict(preX).reshape(-1, 1)
        if i == 0:
            Y_pre = tempPre
        else:
            Y_pre = np.c_[Y_pre, tempPre]
    Y_pre = Y_pre.reshape(-1, 1)
    return Y_pre

#sklearn???????
svm-prediction.py 文件源码 项目:svm-prediction 作者: zhengze 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def svr_main(X, Y):
    X_train = X[:TRAIN_SIZE]
    Y_train = Y[:TRAIN_SIZE]
    X_test = X[TRAIN_SIZE:]
    Y_test = Y[TRAIN_SIZE:]

    clf = SVR(kernel='rbf', C=1e3, gamma=0.00001)
    #clf.fit(X_train,Y_train)
    #y_pred = clf.predict(X_test)
    #plt.plot(X_test, y_pred, linestyle='-', color='red') 

    #clf = GradientBoostingRegressor(n_estimators=100,max_depth=1)
    #clf = DecisionTreeRegressor(max_depth=25)
    #clf = ExtraTreesRegressor(n_estimators=2000,max_depth=14)
    #clf = xgb.XGBRegressor(n_estimators=2000,max_depth=25)
    #clf = RandomForestRegressor(n_estimators=1000,max_depth=26,n_jobs=7)
    predict_list = []
    for i in xrange(TEST_SIZE):
        X = [ [x] for x in xrange(i, TRAIN_SIZE+i)]
        clf.fit(X, Y[i:TRAIN_SIZE+i])
        y_pred = clf.predict([TRAIN_SIZE+1+i])
        predict_list.append(y_pred)

    print "mean_squared_error:%s"%mean_squared_error(Y_test, predict_list)
    print "sqrt of mean_squared_error:%s"%np.sqrt(mean_squared_error(Y_test, predict_list))
    origin_data = Y_test
    print "origin data:%s"%origin_data
    plt.plot([ x for x in xrange(TRAIN_SIZE+1, TRAIN_SIZE+TEST_SIZE+1)], predict_list, linestyle='-', color='red', label='prediction model')  
    plt.plot(X_test, Y_test, linestyle='-', color='blue', label='actual model') 
    plt.legend(loc=1, prop={'size': 12})
    plt.show()
xgradient_boosting.py 文件源码 项目:AutoML-Challenge 作者: postech-mlg-exbrain 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def fit(self, X, y, refit=False):
        import xgboost as xgb

        self.learning_rate = float(self.learning_rate)
        self.n_estimators = int(self.n_estimators)
        self.subsample = float(self.subsample)
        self.max_depth = int(self.max_depth)

        # (TODO) Gb used at most half of the features, here we use all
        self.colsample_bylevel = float(self.colsample_bylevel)

        self.colsample_bytree = float(self.colsample_bytree)
        self.gamma = float(self.gamma)
        self.min_child_weight = int(self.min_child_weight)
        self.max_delta_step = int(self.max_delta_step)
        self.reg_alpha = float(self.reg_alpha)
        self.reg_lambda = float(self.reg_lambda)
        self.nthread = int(self.nthread)
        self.base_score = float(self.base_score)
        self.scale_pos_weight = float(self.scale_pos_weight)

        self.objective = 'reg:linear'

        self.estimator = xgb.XGBRegressor(
                max_depth=self.max_depth,
                learning_rate=self.learning_rate,
                n_estimators=self.n_estimators,
                silent=self.silent,
                objective=self.objective,
                nthread=self.nthread,
                gamma=self.gamma,
                scale_pos_weight=self.scale_pos_weight,
                min_child_weight=self.min_child_weight,
                max_delta_step=self.max_delta_step,
                subsample=self.subsample,
                colsample_bytree=self.colsample_bytree,
                colsample_bylevel=self.colsample_bylevel,
                reg_alpha=self.reg_alpha,
                reg_lambda=self.reg_lambda,
                base_score=self.base_score,
                seed=self.seed
                )

        self.estimator.fit(X, y)

        return self
main_CV.py 文件源码 项目:kaggle_bnp-paribas 作者: ArdalanM 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def models():

    extra_params_kaggle_cla = {'n_estimators':1200,'max_features':30,'criterion':'entropy',
                           'min_samples_leaf': 2, 'min_samples_split': 2,'max_depth': 30,
                           'min_samples_leaf': 2, 'n_jobs':nthread, 'random_state':seed}

    extra_params_kaggle_reg = {'n_estimators':1200,'max_features':30,'criterion':'mse',
                           'min_samples_leaf': 2, 'min_samples_split': 2,'max_depth': 30,
                           'min_samples_leaf': 2, 'n_jobs':nthread, 'random_state':seed}


    xgb_reg = {'objective':'reg:linear', 'max_depth': 11, 'learning_rate':0.01, 'subsample':.9,
           'n_estimators':10000, 'colsample_bytree':0.45, 'nthread':nthread, 'seed':seed}

    xgb_cla = {'objective':'binary:logistic', 'max_depth': 11, 'learning_rate':0.01, 'subsample':.9,
           'n_estimators':10000, 'colsample_bytree':0.45, 'nthread':nthread, 'seed':seed}


    #NN params
    nb_epoch = 3
    batch_size = 128
    esr = 402

    param1 = {
        'hidden_units': (256, 256),
        'activation': (advanced_activations.PReLU(),advanced_activations.PReLU(),core.activations.sigmoid),
        'dropout': (0., 0.), 'optimizer': RMSprop(), 'nb_epoch': nb_epoch,
    }
    param2 = {
        'hidden_units': (1024, 1024),
        'activation': (advanced_activations.PReLU(),advanced_activations.PReLU(),core.activations.sigmoid),
        'dropout': (0., 0.), 'optimizer': RMSprop(), 'nb_epoch': nb_epoch,
    }
    clfs = [
        (D2, XGBClassifier(**xgb_cla)),
        (D11, XGBClassifier(**xgb_cla)),

        (D2, XGBRegressor(**xgb_reg)),
        (D11, XGBRegressor(**xgb_reg)),

        (D2, ensemble.ExtraTreesClassifier(**extra_params_kaggle_cla)),
        (D11, ensemble.ExtraTreesClassifier(**extra_params_kaggle_cla)),

        (D2, ensemble.ExtraTreesRegressor(**extra_params_kaggle_reg)),
        (D11, ensemble.ExtraTreesRegressor(**extra_params_kaggle_reg)),

    # (D1, NN(input_dim=D1[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2, loss='binary_crossentropy', class_mode='binary', **param1)),
    # (D3, NN(input_dim=D3[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param1)),
    # (D5, NN(input_dim=D5[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param1)),
    #
    # (D1, NN(input_dim=D1[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param2)),
    # (D3, NN(input_dim=D3[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param2)),
    # (D5, NN(input_dim=D5[0].shape[1], output_dim=1, batch_size=batch_size, early_stopping_epoch=esr, verbose=2,loss='binary_crossentropy', class_mode='binary', **param2))

    ]
    for clf in clfs:
        yield clf
tuner.py 文件源码 项目:xgboost-tuner 作者: cwerner87 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def tune_xgb_params_segment_by_grid(estimator_cls: Type[Union[xgb.XGBClassifier, xgb.XGBRegressor]],
                                    label: np.ndarray,
                                    metric_sklearn: str,
                                    n_jobs: int,
                                    param_grid: dict,
                                    params: dict,
                                    strat_folds: StratifiedKFold,
                                    train: np.ndarray,
                                    verbosity_level: int = 10) -> Tuple[dict, float]:
    """
    Grid search over a segment of XGBoost parameters.

    :param estimator_cls:
        The class type of the estimator to instantiate - either an XGBClassifier or an XGBRegressor.
    :param label:
        An array-like containing the labels of the classification or regression problem.
    :param metric_sklearn:
        The evaluation metric to be passed to scikit-learn's GridSearchCV - see
        http://scikit-learn.org/stable/modules/model_evaluation.html
        for the options this can take - e.g. 'neg_mean_squared_error' for RMSE.
    :param n_jobs:
        The number of jobs to run simultaneously.
    :param param_grid:
        A dictionary of the grid of parameters to be searched over - e.g. {'colsample_bytree': range(0.5, 0.9, 0.1)} to search
        values [0.5, 0.6, 0.7, 0.8].
    :param params:
        A dictionary of XGB parameters.
    :param strat_folds:
        A StratifiedKFold object to cross validate the parameters.
    :param train:
        An array-like containing the training input samples.
    :param verbosity_level:
        An optional parameter to control the verbosity of the grid searching - defaults to the most verbose option.
    :return:
        A dictionary of tuned parameters and a list of the parameters found at each step with their respective scores.
    """
    params_copy = clean_params_for_sk(params)

    grid = GridSearchCV(
        cv=strat_folds.split(train, label),
        estimator=estimator_cls(**params_copy),
        n_jobs=n_jobs,
        param_grid=param_grid,
        scoring=metric_sklearn,
        verbose=verbosity_level
    )
    grid.fit(train, label)
    best_score = grid.best_score_
    # Massage the score to be in line with what xgboost reports
    if metric_sklearn == 'neg_mean_squared_error':
        best_score = abs(best_score) ** 0.5
    elif metric_sklearn == 'neg_log_loss':
        best_score = abs(best_score)
    return {k: grid.best_params_[k] for k in param_grid.keys()}, best_score


问题


面经


文章

微信
公众号

扫码关注公众号