tuner.py 文件源码-python代码片段

def tune_xgb_params_segment_by_grid(estimator_cls: Type[Union[xgb.XGBClassifier, xgb.XGBRegressor]],
                                    label: np.ndarray,
                                    metric_sklearn: str,
                                    n_jobs: int,
                                    param_grid: dict,
                                    params: dict,
                                    strat_folds: StratifiedKFold,
                                    train: np.ndarray,
                                    verbosity_level: int = 10) -> Tuple[dict, float]:
    """
    Grid search over a segment of XGBoost parameters.

    :param estimator_cls:
        The class type of the estimator to instantiate - either an XGBClassifier or an XGBRegressor.
    :param label:
        An array-like containing the labels of the classification or regression problem.
    :param metric_sklearn:
        The evaluation metric to be passed to scikit-learn's GridSearchCV - see
        http://scikit-learn.org/stable/modules/model_evaluation.html
        for the options this can take - e.g. 'neg_mean_squared_error' for RMSE.
    :param n_jobs:
        The number of jobs to run simultaneously.
    :param param_grid:
        A dictionary of the grid of parameters to be searched over - e.g. {'colsample_bytree': range(0.5, 0.9, 0.1)} to search
        values [0.5, 0.6, 0.7, 0.8].
    :param params:
        A dictionary of XGB parameters.
    :param strat_folds:
        A StratifiedKFold object to cross validate the parameters.
    :param train:
        An array-like containing the training input samples.
    :param verbosity_level:
        An optional parameter to control the verbosity of the grid searching - defaults to the most verbose option.
    :return:
        A dictionary of tuned parameters and a list of the parameters found at each step with their respective scores.
    """
    params_copy = clean_params_for_sk(params)

    grid = GridSearchCV(
        cv=strat_folds.split(train, label),
        estimator=estimator_cls(**params_copy),
        n_jobs=n_jobs,
        param_grid=param_grid,
        scoring=metric_sklearn,
        verbose=verbosity_level
    )
    grid.fit(train, label)
    best_score = grid.best_score_
    # Massage the score to be in line with what xgboost reports
    if metric_sklearn == 'neg_mean_squared_error':
        best_score = abs(best_score) ** 0.5
    elif metric_sklearn == 'neg_log_loss':
        best_score = abs(best_score)
    return {k: grid.best_params_[k] for k in param_grid.keys()}, best_score