def tune_xgb_params_segment_by_grid(estimator_cls: Type[Union[xgb.XGBClassifier, xgb.XGBRegressor]],
label: np.ndarray,
metric_sklearn: str,
n_jobs: int,
param_grid: dict,
params: dict,
strat_folds: StratifiedKFold,
train: np.ndarray,
verbosity_level: int = 10) -> Tuple[dict, float]:
"""
Grid search over a segment of XGBoost parameters.
:param estimator_cls:
The class type of the estimator to instantiate - either an XGBClassifier or an XGBRegressor.
:param label:
An array-like containing the labels of the classification or regression problem.
:param metric_sklearn:
The evaluation metric to be passed to scikit-learn's GridSearchCV - see
http://scikit-learn.org/stable/modules/model_evaluation.html
for the options this can take - e.g. 'neg_mean_squared_error' for RMSE.
:param n_jobs:
The number of jobs to run simultaneously.
:param param_grid:
A dictionary of the grid of parameters to be searched over - e.g. {'colsample_bytree': range(0.5, 0.9, 0.1)} to search
values [0.5, 0.6, 0.7, 0.8].
:param params:
A dictionary of XGB parameters.
:param strat_folds:
A StratifiedKFold object to cross validate the parameters.
:param train:
An array-like containing the training input samples.
:param verbosity_level:
An optional parameter to control the verbosity of the grid searching - defaults to the most verbose option.
:return:
A dictionary of tuned parameters and a list of the parameters found at each step with their respective scores.
"""
params_copy = clean_params_for_sk(params)
grid = GridSearchCV(
cv=strat_folds.split(train, label),
estimator=estimator_cls(**params_copy),
n_jobs=n_jobs,
param_grid=param_grid,
scoring=metric_sklearn,
verbose=verbosity_level
)
grid.fit(train, label)
best_score = grid.best_score_
# Massage the score to be in line with what xgboost reports
if metric_sklearn == 'neg_mean_squared_error':
best_score = abs(best_score) ** 0.5
elif metric_sklearn == 'neg_log_loss':
best_score = abs(best_score)
return {k: grid.best_params_[k] for k in param_grid.keys()}, best_score
评论列表
文章目录