def tune_xgb_params_randomized(estimator_cls,
label: np.ndarray,
metric_sklearn: str,
n_jobs: int,
params: dict,
strat_folds: StratifiedKFold,
train: np.ndarray,
n_iter: int = 20,
verbosity_level: int = 10,
**kwargs):
"""
:param estimator_cls:
The class type of the estimator to instantiate - either an XGBClassifier or an XGBRegressor.
:param label:
An array-like containing the labels of the classification or regression problem.
:param metric_sklearn:
The evaluation metric to be passed to scikit-learn's GridSearchCV - see
http://scikit-learn.org/stable/modules/model_evaluation.html
for the options this can take - e.g. 'neg_mean_squared_error' for RMSE.
:param n_jobs:
The number of jobs to run simultaneously.
:param params:
A dictionary of XGB parameters.
:param strat_folds:
A StratifiedKFold object to cross validate the parameters.
:param train:
An array-like containing the training input samples.
:param n_iter:
An optional parameter to control the number of parameter settings that are sampled.
:param n_jobs:
An optional parameter to control the amount of parallel jobs - defaults to the amount of CPUs available.
:param verbosity_level:
An optional parameter to control the verbosity of the grid searching - defaults to the most verbose option.
:param kwargs:
Parameter distributions may be controlled through keyword arguments - e.g. to sample uniformly between 0.5 and 0.7 for
colsample_bytree, supply colsample_bytree_loc=0.5 and colsample_bytree_scale=0.2.
:return:
A dictionary of tuned parameters and a list of the parameters found at each step with their respective scores.
"""
params_copy = clean_params_for_sk(params)
param_distributions = {
'colsample_bytree': uniform(kwargs.get('colsample_bytree_loc', 0.2), kwargs.get('colsample_bytree_scale', 0.8)),
'gamma': uniform(kwargs.get('gamma_loc', 0), kwargs.get('gamma_scale', 0.9)),
'max_depth': sp_randint(kwargs.get('max_depth_low', 2), kwargs.get('max_depth_high', 11)),
'min_child_weight': sp_randint(kwargs.get('min_child_weight_low', 1), kwargs.get('min_child_weight_high', 11)),
'reg_alpha': halfnorm(kwargs.get('reg_alpha_loc', 0), kwargs.get('reg_alpha_scale', 5)),
'reg_lambda': halfnorm(kwargs.get('reg_alpha_loc', 0), kwargs.get('reg_alpha_scale', 5)),
'subsample': uniform(kwargs.get('subsample_loc', 0.2), kwargs.get('subsample_scale', 0.8))
}
rand_search = RandomizedSearchCV(
cv=strat_folds.split(train, label),
estimator=estimator_cls(**params_copy),
n_iter=n_iter,
n_jobs=n_jobs,
param_distributions=param_distributions,
scoring=metric_sklearn,
verbose=verbosity_level
)
rand_search.fit(train, label)
return rand_search.best_params_, [(rand_search.best_params_, rand_search.best_score_)]
评论列表
文章目录