def regression_with_xgboost(x_train, y_train, X_test, Y_test, features=None, use_cv=True, use_sklean=False, xgb_params=None):
train_data = xgb.DMatrix(x_train, label=y_train, missing=float('nan'))
test_data = xgb.DMatrix(X_test, Y_test, missing=float('nan'))
evallist = [(test_data,'eval'), (train_data,'train')]
#if xgb_params == None:
# xgb_params = get_default_xgboost_params()
if not use_cv:
num_rounds = 10
else:
cvresult = xgb.cv(xgb_params, train_data, num_boost_round=100, nfold=5,
metrics={'rmse'}, show_progress=True)
print cvresult
num_rounds = len(cvresult)
gbdt = None
if(use_sklean):
#gbdt = xgboost.XGBRegressor(max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective='reg:linear', nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, seed=0, missing=None)
xgb_params['n_estimators'] = num_rounds
gbdt = xgboost.XGBRegressor(xgb_params)
gbdt.fit(x_train, y_train)
y_pred = gbdt.predict(X_test)
return gbdt, y_pred
else:
#gbdt = xgb.train( xgb_params, train_data, num_rounds, evallist, verbose_eval = True, early_stopping_rounds=5)
gbdt = xgb.train( xgb_params, train_data, num_rounds, evallist, verbose_eval = True)
ceate_feature_map_for_feature_importance(features)
show_feature_importance(gbdt, feature_names=features)
y_pred = gbdt.predict(xgb.DMatrix(X_test, missing=float("nan")))
return XGBoostModel(gbdt), y_pred
评论列表
文章目录