evaluation.py 文件源码-python代码片段

def evaluate_estimator(datafile, estimator, task,
                       metric=None,
                       logger=None):
    if metric and metric not in METRIC:
        raise ValueError("Invalid metric")

    def scorer(estimator, X, y):
        if task in REGRESSION_TASKS:
            y_pr = estimator.predict(X)
        elif task in CLASSIFICATION_TASKS:
            y_pr = estimator.predict_proba(X, batch_size=1000)
        else:
            raise NotImplementedError()
        score = _calculate_score(y, y_pr, task, metric)

        return score

    eval_s = time.time()

    data_pkl = joblib.load(datafile, 'r')
    resampling = data_pkl['resampling']
    if resampling == 'holdout':
        X_tr = data_pkl["X"]
        y_tr = data_pkl["y"]
        X_val = data_pkl["valid_X"]
        y_val = data_pkl["valid_y"]
        estimator.fit(X_tr, y_tr)
        score = scorer(estimator, X_val, y_val)
    elif resampling == 'cv':
        X, y = data_pkl["X"], data_pkl["y"]
        cv = cross_validation.check_cv(None, X, y, classifier=(task in CLASSIFICATION_TASKS))

        score = defaultdict(list) if metric is None else []
        for train, test in cv:
            X_tr, X_val = X[train], X[test]
            y_tr, y_val = y[train], y[test]
            estimator.fit(X_tr, y_tr)
            score_ = scorer(estimator, X_val, y_val)
            if metric is None:
                for m in score_:
                    score[m].append(score_[m])
            else:
                score.append(score_)
        if metric is None:
            for m in score:
                score[m] = np.mean(score[m])
        else:
            score = np.mean(score)
        estimator.fit(X, y)
    else:
        raise NotImplementedError()

    eval_e = time.time()
    if logger:
        logger.debug("Evaluation done, score: %s | %s sec\n%s" % (score, eval_e-eval_s, estimator))

    return score