python类randint()的实例源码-面圈网

test_model_selection.py 文件源码项目：mlens 作者: flennerhag 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def test_w_prep_fit():
    """[Model Selection] Test run with preprocessing, single step."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100,
                    verbose=True)

    with open(os.devnull, 'w') as f, redirect_stdout(f):

        evl.fit(X, y,
                estimators=[OLS()],
                param_dicts={'ols': {'offset': randint(1, 10)}},
                preprocessing={'pr': [Scale()], 'no': []},
                n_iter=3)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['no.ols'],
            -24.903229451043195)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['pr.ols'],
            -26.510708862278072, 1)

    assert evl.results['params']['no.ols']['offset'] == 4
    assert evl.results['params']['pr.ols']['offset'] == 4

priors.py 文件源码项目：openml-pimp 作者: janvanrijn 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_uniform_paramgrid(hyperparameters, fixed_parameters):
    param_grid = dict()
    for param_name, hyperparameter in hyperparameters.items():
        if fixed_parameters is not None and param_name in fixed_parameters.keys():
            continue
        if isinstance(hyperparameter, CategoricalHyperparameter):
            all_values = hyperparameter.choices
            if all(item in ['True', 'False'] for item in all_values):
                all_values = [bool(item) for item in all_values]
            param_grid[param_name] = all_values
        elif isinstance(hyperparameter, UniformFloatHyperparameter):
            if hyperparameter.log:
                param_grid[param_name] = loguniform(base=2, low=hyperparameter.lower, high=hyperparameter.upper)
            else:
                param_grid[param_name] = uniform(loc=hyperparameter.lower, scale=hyperparameter.upper-hyperparameter.lower)
        elif isinstance(hyperparameter, UniformIntegerHyperparameter):
            if hyperparameter.log:
                param_grid[param_name] = loguniform_int(base=2, low=hyperparameter.lower, high=hyperparameter.upper)
            else:
                param_grid[param_name] = randint(low=hyperparameter.lower, high=hyperparameter.upper+1)
        else:
            raise ValueError()
    return param_grid

test_big.py 文件源码项目：skutil 作者: tgsmith61591 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def test_large_grid():
        """In this test, we purposely overfit a RandomForest to completely random data
        in order to assert that the test error will far supercede the train error.
        """

        if not SK18:
            custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
        else:
            custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)

        # define the pipe
        pipe = Pipeline([
            ('scaler', SelectiveScaler()),
            ('pca', SelectivePCA(weight=True)),
            ('rf', RandomForestClassifier(random_state=42))
        ])

        # define hyper parameters
        hp = {
            'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
            'pca__whiten': [True, False],
            'pca__weight': [True, False],
            'pca__n_components': uniform(0.75, 0.15),
            'rf__n_estimators': randint(5, 10),
            'rf__max_depth': randint(5, 15)
        }

        # define the grid
        grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)

        # this will fail because we haven't fit yet
        assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)

        # fit the grid
        grid.fit(X_train, y_train)

        # score for coverage -- this might warn...
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grid.score(X_train, y_train)

        # coverage:
        assert grid._estimator_type == 'classifier'

        # get predictions
        tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)

        # evaluate score (SHOULD be better than random...)
        accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)

        # grid score reports:
        # assert fails for bad percentile
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})

        # assert fails for bad y_axis
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})

        # assert passes otherwise
        report_grid_score_detail(grid, charts=True, percentile=0.95)  # just ensure percentile works

random_forest.py 文件源码项目：motif 作者: rabitt 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def fit(self, X, Y):
        """ Train classifier.

        Parameters
        ----------
        X : np.array [n_samples, n_features]
            Training features.
        Y : np.array [n_samples]
            Training labels

        """
        x_shuffle, y_shuffle = shuffle(X, Y, random_state=self.random_state)
        clf_cv = RFC(n_estimators=self.n_estimators, n_jobs=self.n_jobs,
                     class_weight=self.class_weight,
                     random_state=self.random_state)
        param_dist = {
            "max_depth": sp_randint(1, 101),
            "max_features": [None, 'auto', 'sqrt', 'log2'],
            "min_samples_split": sp_randint(2, 11),
            "min_samples_leaf": sp_randint(1, 11),
            "bootstrap": [True, False],
            "criterion": ["gini", "entropy"]
        }

        random_search = RandomizedSearchCV(
            clf_cv, param_distributions=param_dist, refit=True,
            n_iter=self.n_iter_search, scoring='f1_weighted',
            random_state=self.random_state
        )
        random_search.fit(x_shuffle, y_shuffle)
        self.clf = random_search.best_estimator_

test_model_selection.py 文件源码项目：mlens 作者: flennerhag 项目源码文件源码阅读 60 收藏 0 点赞 0 评论 0

def test_params():
    """[Model Selection] Test raises on bad params."""
    evl = Evaluator(mape_scorer, verbose=2)

    np.testing.assert_raises(ValueError,
                             evl.fit, X, y,
                             estimators=[OLS()],
                             param_dicts={'bad.ols':
                                          {'offset': randint(1, 10)}},
                             preprocessing={'prep': [Scale()]})

test_model_selection.py 文件源码项目：mlens 作者: flennerhag 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def test_raises():
    """[Model Selection] Test raises on error."""

    evl = Evaluator(bad_scorer, verbose=1)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        np.testing.assert_raises(
            ValueError, evl.fit, X, y, estimators=[OLS()],
            param_dicts={'ols': {'offset': randint(1, 10)}}, n_iter=1)

test_model_selection.py 文件源码项目：mlens 作者: flennerhag 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def test_passes():
    """[Model Selection] Test sets error score on failed scoring."""

    evl = Evaluator(bad_scorer, error_score=0, n_jobs=1, verbose=5)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        evl = np.testing.assert_warns(FitFailedWarning,
                                      evl.fit, X, y,
                                      estimators=[OLS()],
                                      param_dicts={'ols':
                                                   {'offset': randint(1, 10)}},
                                      n_iter=1)
    assert evl.results['test_score-m']['ols'] == 0

test_model_selection.py 文件源码项目：mlens 作者: flennerhag 项目源码文件源码阅读 51 收藏 0 点赞 0 评论 0

def test_no_prep():
    """[Model Selection] Test run without preprocessing."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False,
                    random_state=100, verbose=12)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        evl.fit(X, y,
                estimators=[OLS()],
                param_dicts={'ols': {'offset': randint(1, 10)}},
                n_iter=3)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['ols'],
            -24.903229451043195)
    assert evl.results['params']['ols']['offset'] == 4

test_model_selection.py 文件源码项目：mlens 作者: flennerhag 项目源码文件源码阅读 50 收藏 0 点赞 0 评论 0

def test_w_prep_set_params():
    """[Model Selection] Test run with preprocessing, sep param dists."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100,
                    verbose=2)

    params = {'no.ols': {'offset': randint(3, 6)},
              'pr.ols': {'offset': randint(1, 3)},
              }

    with open(os.devnull, 'w') as f, redirect_stdout(f):

        evl.fit(X, y,
                estimators={'pr': [OLS()], 'no': [OLS()]},
                param_dicts=params,
                preprocessing={'pr': [Scale()], 'no': []},
                n_iter=10)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['no.ols'],
            -18.684229451043198)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['pr.ols'],
            -7.2594502123869491)
    assert evl.results['params']['no.ols']['offset'] == 3
    assert evl.results['params']['pr.ols']['offset'] == 1

test_pipe.py 文件源码项目：skutil 作者: tgsmith61591 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_random_grid():
    # build a pipeline
    pipe = Pipeline([
        ('retainer',       FeatureRetainer()),  # will retain all
        ('dropper',        FeatureDropper()),  # won't drop any
        ('mapper',         FunctionMapper()),  # pass through
        ('encoder',        OneHotCategoricalEncoder()),  # no object dtypes, so will pass through
        ('collinearity',   MulticollinearityFilterer(threshold=0.85)),
        ('imputer',        SelectiveImputer()),  # pass through
        ('scaler',         SelectiveScaler()),
        ('boxcox',         BoxCoxTransformer()),
        ('nzv',            NearZeroVarianceFilterer(threshold=1e-4)),
        ('pca',            SelectivePCA(n_components=0.9)),
        ('model',          RandomForestClassifier(n_jobs=1))
    ])

    # let's define a set of hyper-parameters over which to search
    hp = {
        'collinearity__threshold':    uniform(loc=.8, scale=.15),
        'collinearity__method':       ['pearson', 'kendall', 'spearman'],
        'scaler__scaler':             [StandardScaler(), RobustScaler()],
        'pca__n_components':          uniform(loc=.75, scale=.2),
        'pca__whiten':                [True, False],
        'model__n_estimators':        randint(5, 10),
        'model__max_depth':           randint(2, 5),
        'model__min_samples_leaf':    randint(1, 5),
        'model__max_features':        uniform(loc=.5, scale=.5),
        'model__max_leaf_nodes':      randint(10, 15)
    }

    # define the gridsearch
    search = RandomizedSearchCV(pipe, hp,
                                n_iter=2,  # just to test it even works
                                scoring='accuracy',
                                cv=2,
                                random_state=42)

    # fit the search
    search.fit(X_train, y_train)

    # test the report
    report_grid_score_detail(search, charts=False)

tuner.py 文件源码项目：xgboost-tuner 作者: cwerner87 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def tune_xgb_params_randomized(estimator_cls,
                               label: np.ndarray,
                               metric_sklearn: str,
                               n_jobs: int,
                               params: dict,
                               strat_folds: StratifiedKFold,
                               train: np.ndarray,
                               n_iter: int = 20,
                               verbosity_level: int = 10,
                               **kwargs):
    """
    :param estimator_cls:
        The class type of the estimator to instantiate - either an XGBClassifier or an XGBRegressor.
    :param label:
        An array-like containing the labels of the classification or regression problem.
    :param metric_sklearn:
        The evaluation metric to be passed to scikit-learn's GridSearchCV - see
        http://scikit-learn.org/stable/modules/model_evaluation.html
        for the options this can take - e.g. 'neg_mean_squared_error' for RMSE.
    :param n_jobs:
        The number of jobs to run simultaneously.
    :param params:
        A dictionary of XGB parameters.
    :param strat_folds:
        A StratifiedKFold object to cross validate the parameters.
    :param train:
        An array-like containing the training input samples.
    :param n_iter:
        An optional parameter to control the number of parameter settings that are sampled.
    :param n_jobs:
        An optional parameter to control the amount of parallel jobs - defaults to the amount of CPUs available.
    :param verbosity_level:
        An optional parameter to control the verbosity of the grid searching - defaults to the most verbose option.
    :param kwargs:
        Parameter distributions may be controlled through keyword arguments - e.g. to sample uniformly between 0.5 and 0.7 for
        colsample_bytree, supply colsample_bytree_loc=0.5 and colsample_bytree_scale=0.2.
    :return:
        A dictionary of tuned parameters and a list of the parameters found at each step with their respective scores.
    """
    params_copy = clean_params_for_sk(params)
    param_distributions = {
        'colsample_bytree': uniform(kwargs.get('colsample_bytree_loc', 0.2), kwargs.get('colsample_bytree_scale', 0.8)),
        'gamma': uniform(kwargs.get('gamma_loc', 0), kwargs.get('gamma_scale', 0.9)),
        'max_depth': sp_randint(kwargs.get('max_depth_low', 2), kwargs.get('max_depth_high', 11)),
        'min_child_weight': sp_randint(kwargs.get('min_child_weight_low', 1), kwargs.get('min_child_weight_high', 11)),
        'reg_alpha': halfnorm(kwargs.get('reg_alpha_loc', 0), kwargs.get('reg_alpha_scale', 5)),
        'reg_lambda': halfnorm(kwargs.get('reg_alpha_loc', 0), kwargs.get('reg_alpha_scale', 5)),
        'subsample': uniform(kwargs.get('subsample_loc', 0.2), kwargs.get('subsample_scale', 0.8))
    }

    rand_search = RandomizedSearchCV(
        cv=strat_folds.split(train, label),
        estimator=estimator_cls(**params_copy),
        n_iter=n_iter,
        n_jobs=n_jobs,
        param_distributions=param_distributions,
        scoring=metric_sklearn,
        verbose=verbosity_level
    )
    rand_search.fit(train, label)
    return rand_search.best_params_, [(rand_search.best_params_, rand_search.best_score_)]