python类check_cv()的实例源码-面圈网

test_cross_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_check_cv_return_types():
    X = np.ones((9, 2))
    cv = cval.check_cv(3, X, classifier=False)
    assert_true(isinstance(cv, cval.KFold))

    y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1])
    cv = cval.check_cv(3, X, y_binary, classifier=True)
    assert_true(isinstance(cv, cval.StratifiedKFold))

    y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
    cv = cval.check_cv(3, X, y_multiclass, classifier=True)
    assert_true(isinstance(cv, cval.StratifiedKFold))

    X = np.ones((5, 2))
    y_multilabel = [[1, 0, 1], [1, 1, 0], [0, 0, 0], [0, 1, 1], [1, 0, 0]]
    cv = cval.check_cv(3, X, y_multilabel, classifier=True)
    assert_true(isinstance(cv, cval.KFold))

    y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]])
    cv = cval.check_cv(3, X, y_multioutput, classifier=True)
    assert_true(isinstance(cv, cval.KFold))

fixes.py 文件源码项目：skutil 作者: tgsmith61591 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def _set_cv(cv, X, y, classifier):
    """This method returns either a `sklearn.cross_validation._PartitionIterator` or 
    `sklearn.model_selection.BaseCrossValidator` depending on whether sklearn-0.17
    or sklearn-0.18 is being used.

    Parameters
    ----------

    cv : int, `_PartitionIterator` or `BaseCrossValidator`
        The CV object or int to check. If an int, will be converted
        into the appropriate class of crossvalidator.

    X : pd.DataFrame or np.ndarray, shape(n_samples, n_features)
        The dataframe or np.ndarray being fit in the grid search.

    y : np.ndarray, shape(n_samples,)
        The target being fit in the grid search.

    classifier : bool
        Whether the estimator being fit is a classifier

    Returns
    -------

    `_PartitionIterator` or `BaseCrossValidator`
    """
    return check_cv(cv, X, y, classifier) if not SK18 else check_cv(cv, y, classifier)

search.py 文件源码项目：sigopt_sklearn 作者: sigopt 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def our_check_cv(cv, X, y, classifier):
      ret = base_check_cv(cv, y, classifier)
      return ret.n_splits, list(ret.split(X, y=y))

search.py 文件源码项目：sigopt_sklearn 作者: sigopt 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def our_check_cv(cv, X, y, classifier):
      ret = base_check_cv(cv, X, y, classifier)
      return len(ret), list(iter(ret))

evaluation.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def evaluate_estimator(datafile, estimator, task,
                       metric=None,
                       logger=None):
    if metric and metric not in METRIC:
        raise ValueError("Invalid metric")

    def scorer(estimator, X, y):
        if task in REGRESSION_TASKS:
            y_pr = estimator.predict(X)
        elif task in CLASSIFICATION_TASKS:
            y_pr = estimator.predict_proba(X, batch_size=1000)
        else:
            raise NotImplementedError()
        score = _calculate_score(y, y_pr, task, metric)

        return score

    eval_s = time.time()

    data_pkl = joblib.load(datafile, 'r')
    resampling = data_pkl['resampling']
    if resampling == 'holdout':
        X_tr = data_pkl["X"]
        y_tr = data_pkl["y"]
        X_val = data_pkl["valid_X"]
        y_val = data_pkl["valid_y"]
        estimator.fit(X_tr, y_tr)
        score = scorer(estimator, X_val, y_val)
    elif resampling == 'cv':
        X, y = data_pkl["X"], data_pkl["y"]
        cv = cross_validation.check_cv(None, X, y, classifier=(task in CLASSIFICATION_TASKS))

        score = defaultdict(list) if metric is None else []
        for train, test in cv:
            X_tr, X_val = X[train], X[test]
            y_tr, y_val = y[train], y[test]
            estimator.fit(X_tr, y_tr)
            score_ = scorer(estimator, X_val, y_val)
            if metric is None:
                for m in score_:
                    score[m].append(score_[m])
            else:
                score.append(score_)
        if metric is None:
            for m in score:
                score[m] = np.mean(score[m])
        else:
            score = np.mean(score)
        estimator.fit(X, y)
    else:
        raise NotImplementedError()

    eval_e = time.time()
    if logger:
        logger.debug("Evaluation done, score: %s | %s sec\n%s" % (score, eval_e-eval_s, estimator))

    return score

modifiedGridSearchCV.py 文件源码项目：CerebralCortex-2.0-legacy 作者: MD2Korg 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def fit(self, X, y):
        """Actual fitting,  performing the search over parameters."""

        parameter_iterable = ParameterGrid(self.param_grid)

        estimator = self.estimator
        cv = self.cv

        n_samples = _num_samples(X)
        X, y = indexable(X, y)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number '
                                 'of samples (%i) than data (X: %i samples)'
                                 % (len(y), n_samples))
        cv = check_cv(cv, X, y, classifier=is_classifier(estimator))

        if self.verbose > 0:
            if isinstance(parameter_iterable, Sized):
                n_candidates = len(parameter_iterable)
                print("Fitting {0} folds for each of {1} candidates, totalling"
                      " {2} fits".format(len(cv), n_candidates,
                                         n_candidates * len(cv)))

        base_estimator = clone(self.estimator)

        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose,
            pre_dispatch=pre_dispatch
        )(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
                                      parameters, cv=cv)
            for parameters in parameter_iterable)

        best = sorted(out, key=lambda x: x[0])[-1]
        self.best_params_ = best[1]
        self.best_score_ = best[0]

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best[1])
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator

        return self

modifiedRandomizedSearchCV.py 文件源码项目：CerebralCortex-2.0-legacy 作者: MD2Korg 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def fit(self, X, y):
        """Actual fitting,  performing the search over parameters."""

        parameter_iterable = ParameterSampler(self.param_distributions,
                                              self.n_iter,
                                              random_state=self.random_state)
        estimator = self.estimator
        cv = self.cv

        n_samples = _num_samples(X)
        X, y = indexable(X, y)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number '
                                 'of samples (%i) than data (X: %i samples)'
                                 % (len(y), n_samples))
        cv = check_cv(cv, X, y, classifier=is_classifier(estimator))

        if self.verbose > 0:
            if isinstance(parameter_iterable, Sized):
                n_candidates = len(parameter_iterable)
                print("Fitting {0} folds for each of {1} candidates, totalling"
                      " {2} fits".format(len(cv), n_candidates,
                                         n_candidates * len(cv)))

        base_estimator = clone(self.estimator)

        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose,
            pre_dispatch=pre_dispatch
        )(
            delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
                                      parameters, cv=cv)
            for parameters in parameter_iterable)

        best = sorted(out, reverse=True)[0]
        self.best_params_ = best[1]
        self.best_score_ = best[0]

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best[1])
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator

        return self