def cached_run(steps, X, y):
step_identifier = ''
# split data
n = len(y)
kf = KFold(n, _n_fold, random_state=_random_state)
folded_data = [(X[train_index], y[train_index], X[test_index], y[test_index]) for train_index, test_index in kf]
# last step is estimator, handle separately
for step in steps[:-1]:
step_identifier += "/%s" % _step_identifier(step)
logger.info("Processing %s", step_identifier)
folded_data = run_step_on_demand(step_identifier, step, folded_data)
scores = []
estimator = steps[-1]
step_identifier += "/%s" % _step_identifier(estimator)
for (X_train, y_train, X_test, y_test) in folded_data:
estimator.fit(X_train, y_train)
scores.append(estimator.score(X_test, y_test))
score = np.mean(scores)
logger.info("score of %s is %r", step_identifier, score)
return score
评论列表
文章目录