def test_check_cv_return_types():
X = np.ones((9, 2))
cv = cval.check_cv(3, X, classifier=False)
assert_true(isinstance(cv, cval.KFold))
y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1])
cv = cval.check_cv(3, X, y_binary, classifier=True)
assert_true(isinstance(cv, cval.StratifiedKFold))
y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
cv = cval.check_cv(3, X, y_multiclass, classifier=True)
assert_true(isinstance(cv, cval.StratifiedKFold))
X = np.ones((5, 2))
y_multilabel = [[1, 0, 1], [1, 1, 0], [0, 0, 0], [0, 1, 1], [1, 0, 0]]
cv = cval.check_cv(3, X, y_multilabel, classifier=True)
assert_true(isinstance(cv, cval.KFold))
y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]])
cv = cval.check_cv(3, X, y_multioutput, classifier=True)
assert_true(isinstance(cv, cval.KFold))
python类check_cv()的实例源码
def _set_cv(cv, X, y, classifier):
"""This method returns either a `sklearn.cross_validation._PartitionIterator` or
`sklearn.model_selection.BaseCrossValidator` depending on whether sklearn-0.17
or sklearn-0.18 is being used.
Parameters
----------
cv : int, `_PartitionIterator` or `BaseCrossValidator`
The CV object or int to check. If an int, will be converted
into the appropriate class of crossvalidator.
X : pd.DataFrame or np.ndarray, shape(n_samples, n_features)
The dataframe or np.ndarray being fit in the grid search.
y : np.ndarray, shape(n_samples,)
The target being fit in the grid search.
classifier : bool
Whether the estimator being fit is a classifier
Returns
-------
`_PartitionIterator` or `BaseCrossValidator`
"""
return check_cv(cv, X, y, classifier) if not SK18 else check_cv(cv, y, classifier)
def our_check_cv(cv, X, y, classifier):
ret = base_check_cv(cv, y, classifier)
return ret.n_splits, list(ret.split(X, y=y))
def our_check_cv(cv, X, y, classifier):
ret = base_check_cv(cv, X, y, classifier)
return len(ret), list(iter(ret))
def evaluate_estimator(datafile, estimator, task,
metric=None,
logger=None):
if metric and metric not in METRIC:
raise ValueError("Invalid metric")
def scorer(estimator, X, y):
if task in REGRESSION_TASKS:
y_pr = estimator.predict(X)
elif task in CLASSIFICATION_TASKS:
y_pr = estimator.predict_proba(X, batch_size=1000)
else:
raise NotImplementedError()
score = _calculate_score(y, y_pr, task, metric)
return score
eval_s = time.time()
data_pkl = joblib.load(datafile, 'r')
resampling = data_pkl['resampling']
if resampling == 'holdout':
X_tr = data_pkl["X"]
y_tr = data_pkl["y"]
X_val = data_pkl["valid_X"]
y_val = data_pkl["valid_y"]
estimator.fit(X_tr, y_tr)
score = scorer(estimator, X_val, y_val)
elif resampling == 'cv':
X, y = data_pkl["X"], data_pkl["y"]
cv = cross_validation.check_cv(None, X, y, classifier=(task in CLASSIFICATION_TASKS))
score = defaultdict(list) if metric is None else []
for train, test in cv:
X_tr, X_val = X[train], X[test]
y_tr, y_val = y[train], y[test]
estimator.fit(X_tr, y_tr)
score_ = scorer(estimator, X_val, y_val)
if metric is None:
for m in score_:
score[m].append(score_[m])
else:
score.append(score_)
if metric is None:
for m in score:
score[m] = np.mean(score[m])
else:
score = np.mean(score)
estimator.fit(X, y)
else:
raise NotImplementedError()
eval_e = time.time()
if logger:
logger.debug("Evaluation done, score: %s | %s sec\n%s" % (score, eval_e-eval_s, estimator))
return score
modifiedGridSearchCV.py 文件源码
项目:CerebralCortex-2.0-legacy
作者: MD2Korg
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterGrid(self.param_grid)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, key=lambda x: x[0])[-1]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self
modifiedRandomizedSearchCV.py 文件源码
项目:CerebralCortex-2.0-legacy
作者: MD2Korg
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterSampler(self.param_distributions,
self.n_iter,
random_state=self.random_state)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(
delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, reverse=True)[0]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self