run_model.py 文件源码-python代码片段

def k_fold_validation(model, monitored_data, unmonitored_data, k, random_state=123):
    """
    Performs k fold validation on a model. During each fold, records all of the scoring in the `scoring_methods` module.

    @param model is a machine learning model that has the functions `fit(X, y)` and `predict(X)`
    @param monitored_data an array-like matrix that has the following structure `[(features, value)]`
    @param unmonitored_data is also an array-like object: [features]
    @param k is the amount of folds

    @return is a 2D array of scores, with the following structure `[{scoring_method: score}]` where the shape is `len(k)`
    """
    X, y = get_X_y(monitored_data, unmonitored_data)
    skf = StratifiedKFold(n_splits=k, random_state=random_state, shuffle=True)

    evaluations = []
    i = 1
    for train, test in skf.split(X, y):
        print("Starting split {}".format(i))
        X_train, X_test = X[train], X[test]
        y_train, y_test = y[train], y[test]

        print("Fitting data")
        model.fit(X_train, y_train)

        print("Predicting")
        prediction = model.predict(X_test)

        evaluations.append(scoring_methods.evaluate_model(prediction, y_test))

        print(evaluations[-1])

        i += 1

    return evaluations