python类LeaveOneOut()的实例源码-面圈网

methods.py 文件源码项目：pydl 作者: rafaeltg 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_cv_method(method, **kwargs):

    if method == 'kfold':
        return KFold(**kwargs)
    elif method == 'skfold':
        return StratifiedKFold(**kwargs)
    elif method == 'loo':
        return LeaveOneOut()
    elif method == 'shuffle_split':
        return ShuffleSplit(**kwargs)
    elif method == 'split':
        return TrainTestSplit(**kwargs)
    elif method == 's_shuffle_split':
        return StratifiedShuffleSplit(**kwargs)
    elif method == 'time_series':
        return TimeSeriesSplit(**kwargs)
    else:
        raise AttributeError('Invalid CV method - %s!' % method)

predict.py 文件源码项目：momoCrawler 作者: njames741 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def _leave_one_out(algr, X, y):
    loo = LeaveOneOut()
    square_error_sum = 0.0
    for train_index, test_index in loo.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model = algr.fit(X_train, y_train.ravel())
        predicted_y = model.predict(X_test)
        square_error_sum += float(y_test[0] - predicted_y) ** 2
    mse = square_error_sum / X.shape[0]
    print '-----------------------'
    print 'Leave One Out?mse ' , mse
    print '-----------------------'

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def cv_LinearRegression_Bias( xM, yV):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    #print( "cv_LinearRegression_None", xM.shape, yV.shape)
    X, y = np.array( xM)[:,0], np.array( yV)[:,0]

    # only 1-dim is allowed for both X and y
    assert (X.ndim == 1) or (X.shape[2] == 1) and (yV.ndim == 1) or (yV.shape[2] == 1)

    loo_c = model_selection.LeaveOneOut()
    loo = loo_c.split( X)

    yP = y.copy()
    for train, test in loo:
        bias = np.mean(y[train] - X[train])
        yP[test] = X[test] + bias

    cv_score_le = np.abs( np.array( y - yP)).tolist()

    o_d = {'median_abs_err': np.median( cv_score_le),
           'mean_abs_err': np.mean( cv_score_le),
           'std_abs_err': np.std( cv_score_le), # this can be std(err)
           'list': cv_score_le,
           'ci': "t.b.d",
           'yVp': X.tolist()}

    return o_d

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def cv_LinearRegression_Bias( xM, yV):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    #print( "cv_LinearRegression_None", xM.shape, yV.shape)
    X, y = np.array( xM)[:,0], np.array( yV)[:,0]

    # only 1-dim is allowed for both X and y
    assert (X.ndim == 1) or (X.shape[2] == 1) and (yV.ndim == 1) or (yV.shape[2] == 1)

    loo_c = model_selection.LeaveOneOut()
    loo = loo_c.split( X)

    yP = y.copy()
    for train, test in loo:
        bias = np.mean(y[train] - X[train])
        yP[test] = X[test] + bias

    cv_score_le = np.abs( np.array( y - yP)).tolist()

    o_d = {'median_abs_err': np.median( cv_score_le),
           'mean_abs_err': np.mean( cv_score_le),
           'std_abs_err': np.std( cv_score_le), # this can be std(err)
           'list': cv_score_le,
           'ci': "t.b.d",
           'yVp': X.tolist()}

    return o_d

signal_extractor.py 文件源码项目：Automatic-feature-extraction-from-signal 作者: VVVikulin 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def basic_quality(self, target, feature_vector):
        assert (len(target) == len(feature_vector))
        if self.quality == 'NWP':
            sort_data_p = np.array([x for (y,x) in sorted(zip(feature_vector, target), key=lambda x: x[0])])
            sort_data_n = np.array([x for (y,x) in sorted(zip(-1.0 * feature_vector, target), key=lambda x: x[0])])
            p_nwp = QualityMeasure.calc_nwp(sort_data_p)
            n_nwp = QualityMeasure.calc_nwp(sort_data_n)
            return min(n_nwp, p_nwp)
        if self.quality == 'corrcoef':
            return 1 - abs(np.corrcoef(target, feature_vector)[0][1])
        if self.quality == 'mutual_info':
            m = MINE()
            m.compute_score(target, feature_vector)
            return 1.0 - m.mic()
        if self.quality == 'chi2':
            return 1 - chi2(abs(feature_vector.reshape(len(feature_vector), 1)), target)[0][0]
        if self.quality == 'distcorr':
            return 1 - distcorr(target, feature_vector)
        if self.quality == 'distree':
            data = np.column_stack((feature_vector, self.random_feature))
            clf = DecisionTreeClassifier(max_depth=5,  random_state=0)
            clf.fit(data, target)
            return 1.0 - clf.feature_importances_[0]
        if self.quality == 'knnscore':
            errors = []
            clf = KNeighborsClassifier()
            data = np.array([feature_vector]).transpose()
            loo = LeaveOneOut()
            for train, test in loo.split(data):
                clf = KNeighborsClassifier()
                clf.fit(data[train], target[train])
                errors.append(accuracy_score(target[test], clf.predict(data[test])))
            return 1.0 - np.mean(errors)
        return 'WRONG QUALITY NAME'

test_split.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_nested_cv():
    # Test if nested cross validation works with different combinations of cv
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    labels = rng.randint(0, 5, 15)

    cvs = [LeaveOneLabelOut(), LeaveOneOut(), LabelKFold(), StratifiedKFold(),
           StratifiedShuffleSplit(n_iter=3, random_state=0)]

    for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
        gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
                          cv=inner_cv)
        cross_val_score(gs, X=X, y=y, labels=labels, cv=outer_cv,
                        fit_params={'labels': labels})

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = KFold()

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv.split(X, y):
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = LeaveOneOut()
    preds = cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    class BadCV():
        def split(self, X, y=None, labels=None):
            for i in range(4):
                yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())

test_split.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_cross_validator_with_default_params():
    n_samples = 4
    n_unique_labels = 4
    n_folds = 2
    p = 2
    n_iter = 10  # (the default value)

    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    X_1d = np.array([1, 2, 3, 4])
    y = np.array([1, 1, 2, 2])
    labels = np.array([1, 2, 3, 4])
    loo = LeaveOneOut()
    lpo = LeavePOut(p)
    kf = KFold(n_folds)
    skf = StratifiedKFold(n_folds)
    lolo = LeaveOneLabelOut()
    lopo = LeavePLabelOut(p)
    ss = ShuffleSplit(random_state=0)
    ps = PredefinedSplit([1, 1, 2, 2])  # n_splits = np of unique folds = 2

    loo_repr = "LeaveOneOut()"
    lpo_repr = "LeavePOut(p=2)"
    kf_repr = "KFold(n_folds=2, random_state=None, shuffle=False)"
    skf_repr = "StratifiedKFold(n_folds=2, random_state=None, shuffle=False)"
    lolo_repr = "LeaveOneLabelOut()"
    lopo_repr = "LeavePLabelOut(n_labels=2)"
    ss_repr = ("ShuffleSplit(n_iter=10, random_state=0, test_size=0.1, "
               "train_size=None)")
    ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"

    n_splits = [n_samples, comb(n_samples, p), n_folds, n_folds,
                n_unique_labels, comb(n_unique_labels, p), n_iter, 2]

    for i, (cv, cv_repr) in enumerate(zip(
            [loo, lpo, kf, skf, lolo, lopo, ss, ps],
            [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
             ss_repr, ps_repr])):
        # Test if get_n_splits works correctly
        assert_equal(n_splits[i], cv.get_n_splits(X, y, labels))

        # Test if the cross-validator works as expected even if
        # the data is 1d
        np.testing.assert_equal(list(cv.split(X, y, labels)),
                                list(cv.split(X_1d, y, labels)))
        # Test that train, test indices returned are integers
        for train, test in cv.split(X, y, labels):
            assert_equal(np.asarray(train).dtype.kind, 'i')
            assert_equal(np.asarray(train).dtype.kind, 'i')

        # Test if the repr works without any errors
        assert_equal(cv_repr, repr(cv))