python类cross_val_predict()的实例源码-面圈网

exp.py 文件源码项目：scienceie17 作者: OC-ScienceIE 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def run_exp_train_cv(crf, feat_dirs, target_label, n_folds=5, n_jobs=-1):
    """
    Run cross-validated experiment on training data
    """
    # Collect data for running CRF classifier
    train_dir = join(LOCAL_DIR, 'train')
    true_iob_dir = join(train_dir, 'iob')
    X = collect_features(true_iob_dir, *feat_dirs)
    labels_fname = join(train_dir, 'train_labels.pkl')
    labels = read_labels(labels_fname)
    y_true = labels[target_label]
    folds_fname = join(train_dir, 'folds.pkl')
    folds = read_folds(folds_fname, n_folds)

    # Predict]
    y_pred = cross_val_predict(crf, X, y_true, cv=folds, verbose=2, n_jobs=n_jobs)
    print(flat_classification_report(y_true, y_pred, digits=3, labels=('B', 'I')))
    return y_pred

base.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def predict(self, X, y):
        """
        Returns a generator containing the predictions for each of the
        internal models (using cross_val_predict and a CV=12).

        Parameters
        ----------

        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features

        y : ndarray or Series of length n
            An array or series of target or class values

        kwargs: dict
            keyword arguments passed to Scikit-Learn API.

        """
        for model in self.models:
            yield cvp(model, X, y, cv=12)

test_mlp_classifier.py 文件源码项目：muffnn 作者: civisanalytics 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96)

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = svm.SVR( **svr_params)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def cv( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def cvLOO( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_splits = xM.shape[0]

    # print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = model_selection.KFold( xM.shape[0], n_splits=n_splits)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = svm.SVR( **svr_params)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def cv( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf_n_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def cvLOO( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_splits = xM.shape[0]

    # print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = model_selection.KFold( xM.shape[0], n_splits=n_splits)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

jgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def cv_SVR(xM, yV, svr_params, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = svm.SVR(**svr_params)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)

    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV.A1, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred

jgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _cv_r0(method, xM, yV, alpha, n_folds=5, n_jobs=-1, grid_std=None, graph=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred

jgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def cv(method, xM, yV, alpha, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    Return
    --------
    yV_pred
    """
    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred

jgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def _cv_LOO_r0(method, xM, yV, alpha, n_jobs=-1, grid_std=None, graph=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    n_folds = xM.shape[0]

    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    # print("Note - shuffling is not applied because of LOO.")
    kf_n_c = model_selection.KFold(n_splits=n_folds)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred

jgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def cv_Ridge_BIKE(A_list, yV, XX=None, alpha=0.5, n_folds=5, n_jobs=-1, grid_std=None):
    """
    Older version than cv_Ridge_BIKE
    """

    clf = binary_model.BIKE_Ridge(A_list, XX, alpha=alpha)
    ln = A_list[0].shape[0]  # ls is the number of molecules.
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(A_list)

    AX_idx = np.array([list(range(ln))]).T
    yV_pred = model_selection.cross_val_predict(
        clf, AX_idx, yV, cv=kf_n, n_jobs=n_jobs)

    print('The prediction output using cross-validation is given by:')
    jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def test_cross_val_score_predict_labels():
    # Check if ValueError (when labels is None) propagates to cross_val_score
    # and cross_val_predict
    # And also check if labels is correctly passed to the cv object
    X, y = make_classification(n_samples=20, n_classes=2, random_state=0)

    clf = SVC(kernel="linear")

    label_cvs = [LeaveOneLabelOut(), LeavePLabelOut(2), LabelKFold(),
                 LabelShuffleSplit()]
    for cv in label_cvs:
        assert_raise_message(ValueError,
                             "The labels parameter should not be None",
                             cross_val_score, estimator=clf, X=X, y=y, cv=cv)
        assert_raise_message(ValueError,
                             "The labels parameter should not be None",
                             cross_val_predict, estimator=clf, X=X, y=y, cv=cv)

ScikitLearners.py 文件源码项目：Aion 作者: aleisalem 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def predictKFoldKNN(X, y, K=10, kfold=10, selectKBest=0):
    """
    Classifies the data using K-nearest neighbors and k-fold CV
    :param X: The list of feature vectors
    :type X: list
    :param y: The list of labels corresponding to the feature vectors
    :type y: list
    :param K: The number of nearest neighbors to consider in classification
    :type K: int
    :param kfold: The number of folds in the CV
    :type kfold: int
    :param selectKBest: The number of best features to select
    :type selectKBest: int
    :return: An array of predicted classes
    """
    try:
        # Prepare data 
        X, y = numpy.array(X), numpy.array(y)
        # Define classifier
        clf = neighbors.KNeighborsClassifier(n_neighbors=K)
        # Select K Best features if enabled
        X_new = SelectKBest(chi2, k=selectKBest).fit_transform(X, y) if selectKBest > 0 else X
        predicted = cross_val_predict(clf, X_new, y, cv=kfold).tolist()

    except Exception as e:
        prettyPrintError(e)
        return []

    return predicted

ScikitLearners.py 文件源码项目：Aion 作者: aleisalem 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def predictKFoldSVMSSK(X, y, kfold=10, subseqLength=3, selectKBest=0):
    """Classifies the data using Support vector machines with the SSK kernel and k-fold CV
    :param X: The list of text documents containing traces
    :type X: list
    :param y: The labels of documents in 'X'
    :type y: list
    :param kfold: The number of folds
    :type kfold: int (default: 10)
    :param subseqLength: Length of subsequence used by the SSK
    :type subseqLength: int (default: 3)
    :param selectKBest: The number of best features to select
    :type selectKBest: int
    :return: An array of predicted classes 
    """
    try:
        predicted = []
        # Retrieve Gram Matrix from string kernel
        if verboseON():
            prettyPrint("Generating Gram Matrix from documents", "debug")
        X_gram = string_kernel(X, X)
        y = numpy.array(y)
        # Define classifier
        clf = svm.SVC(kernel="precomputed")
        X_gram_new = SelectKBest(chi2, k=selectKBest).fit_transform(X_gram, y) if selectKBest > 0 else X_gram
        prettyPrint("Performing %s-fold CV on the %s best features" % (kfold, selectKBest))
        predicted = cross_val_predict(clf, X_gram_new, y, cv=kfold).tolist()
    except Exception as e:
        prettyPrintError(e)
        return []

    return predicted

ScikitLearners.py 文件源码项目：Aion 作者: aleisalem 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def predictKFoldSVM(X, y, kernel="linear", C=1, selectKBest=0, kfold=10):
    """
    Classifies the data using Support vector machines and k-fold CV
    :param X: The matrix of feature vectors
    :type X: list
    :param y: The vector containing the labels corresponding to feature vectors
    :type y: list
    :param kernel: The kernel used to elevate data into higher dimensionalities
    :type kernel: str
    :param C: The penalty parameter of the error term
    :type C: int
    :param selectKBest: The number of best features to select
    :type selectKBest: int 
    :param kfold: The number of folds to use in K-fold CV
    :type kfold: int
    :return: A list of predicted labels across the k-folds
    """
    try:
        # Prepare data 
        X, y = numpy.array(X), numpy.array(y)
        # Define classifier
        clf = svm.SVC(kernel=kernel, C=C)
        # Select K Best features if enabled
        X_new = SelectKBest(chi2, k=selectKBest).fit_transform(X, y) if selectKBest > 0 else X
        predicted = cross_val_predict(clf, X_new, y, cv=kfold).tolist()
    except Exception as e:
        prettyPrintError(e)
        return []

    return predicted

ScikitLearners.py 文件源码项目：Aion 作者: aleisalem 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def predictKFoldRandomForest(X, y, estimators=10, criterion="gini", maxdepth=None, selectKBest=0, kfold=10):
    """
    Classifies the data using decision trees and k-fold CV
    :param X: The matrix of feature vectors
    :type X: list
    :param y: The vector containing labels corresponding to the feature vectors
    :type y: list
    :param estimators: The number of random trees to use in classification
    :type estimators: int
    :param criterion: The splitting criterion employed by the decision tree
    :type criterion: str
    :param splitter: The method used to split the data
    :type splitter: str
    :param maxDepth: The maximum depth the tree is allowed to grow
    :type maxDepth: int
    :param selectKBest: The number of best features to select
    :type selectKBest: int
    :param kfold: The number of folds to use in K-fold CV
    :type kfold: int
    :return: A list of predicted labels across the k-folds
    """
    try:
        # Prepare data
        X, y = numpy.array(X), numpy.array(y)
        # Define classifier
        clf = ensemble.RandomForestClassifier(n_estimators=estimators, criterion=criterion, max_depth=maxdepth)
        X_new = SelectKBest(chi2, k=selectKBest).fit_transform(X, y) if selectKBest > 0 else X
        predicted = cross_val_predict(clf, X_new, y, cv=kfold).tolist()
    except Exception as e:
        prettyPrintError(e)
        return []

    return predicted

test_fm_classifier.py 文件源码项目：muffnn 作者: civisanalytics 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_cross_val_predict():
    """Make sure it works in cross_val_predict."""

    X, y = load_iris(return_X_y=True)
    X = StandardScaler().fit_transform(X)

    clf = FMClassifier(rank=2, solver='L-BFGS-B', random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(clf, X, y, cv=cv, method='predict')
    acc = accuracy_score(y, y_oos)

    assert acc >= 0.90, "accuracy is too low for iris in cross_val_predict!"

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def cv_Ridge_BIKE( A_list, yV, XX = None, alpha = 0.5, n_splits = 5, n_jobs = -1, grid_std = None):

    clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
    ln = A_list[0].shape[0] # ls is the number of molecules.
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( A_list[0])

    AX_idx = np.array([list(range( ln))]).T
    yV_pred = model_selection.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

    print('The prediction output using cross-validation is given by:')
    jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def cv_Ridge_BIKE( A_list, yV, XX = None, alpha = 0.5, n_splits = 5, n_jobs = -1, grid_std = None):

    clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
    ln = A_list[0].shape[0] # ls is the number of molecules.
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( A_list[0])

    AX_idx = np.array([list(range( ln))]).T
    yV_pred = model_selection.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

    print('The prediction output using cross-validation is given by:')
    kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

_test.py 文件源码项目：ibex 作者: atavory 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _generate_cross_val_predict_test(X, y, est, pd_est, must_match):
    def test(self):
        self.assertEqual(
            hasattr(est, 'predict'),
            hasattr(pd_est, 'predict'))
        if not hasattr(est, 'predict'):
            return
        pd_y_hat = pd_cross_val_predict(pd_est, X, y)
        self.assertTrue(isinstance(pd_y_hat, pd.Series))
        self.assertTrue(pd_y_hat.index.equals(X.index))
        if must_match:
            y_hat = cross_val_predict(est, X.as_matrix(), y.values)
            np.testing.assert_allclose(pd_y_hat, y_hat)
    return test

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = KFold()

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv.split(X, y):
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = LeaveOneOut()
    preds = cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    class BadCV():
        def split(self, X, y=None, labels=None):
            for i in range(4):
                yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_cross_val_predict_input_types():
    iris = load_iris()
    X, y = iris.data, iris.target
    X_sparse = coo_matrix(X)
    multioutput_y = np.column_stack([y, y[::-1]])

    clf = Ridge(fit_intercept=False, random_state=0)
    # 3 fold cv is used --> atleast 3 samples per class
    # Smoke test
    predictions = cross_val_predict(clf, X, y)
    assert_equal(predictions.shape, (150,))

    # test with multioutput y
    predictions = cross_val_predict(clf, X_sparse, multioutput_y)
    assert_equal(predictions.shape, (150, 2))

    predictions = cross_val_predict(clf, X_sparse, y)
    assert_array_equal(predictions.shape, (150,))

    # test with multioutput y
    predictions = cross_val_predict(clf, X_sparse, multioutput_y)
    assert_array_equal(predictions.shape, (150, 2))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    predictions = cross_val_predict(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    predictions = cross_val_predict(clf, X, y.tolist())

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    check_3d = lambda x: x.ndim == 3
    clf = CheckingClassifier(check_X=check_3d)
    predictions = cross_val_predict(clf, X_3d, y)
    assert_array_equal(predictions.shape, (150,))

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_cross_val_predict_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y2)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cross_val_predict(clf, X_df, y_ser)

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def test_cross_val_predict_sparse_prediction():
    # check that cross_val_predict gives same result for sparse and dense input
    X, y = make_multilabel_classification(n_classes=2, n_labels=1,
                                          allow_unlabeled=False,
                                          return_indicator=True,
                                          random_state=1)
    X_sparse = csr_matrix(X)
    y_sparse = csr_matrix(y)
    classif = OneVsRestClassifier(SVC(kernel='linear'))
    preds = cross_val_predict(classif, X, y, cv=10)
    preds_sparse = cross_val_predict(classif, X_sparse, y_sparse, cv=10)
    preds_sparse = preds_sparse.toarray()
    assert_array_almost_equal(preds_sparse, preds)

drift_estimator.py 文件源码项目：MLBox 作者: AxeldeRomblay 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def fit(self, df_train, df_test):

        """
        Computes the drift between the two datasets

        Parameters
        ----------
        df_train : pandas dataframe of shape = (n_train, p)
            The train set

        df_test : pandas dataframe of shape = (n_test, p)
            The test set

        Returns
        -------
        self : object
            Returns self.
        """

        df_train["target"] = 0
        df_test["target"] = 1

        self.__target = pd.concat((df_train.target, df_test.target),
                                  ignore_index=True)

        if self.stratify:
            self.__cv = StratifiedKFold(n_splits=self.n_folds,
                                        shuffle=True,
                                        random_state=self.random_state)
        else:
            self.__cv = KFold(n_splits=self.n_folds,
                              shuffle=True,
                              random_state=self.random_state)

        X_tmp = pd.concat((df_train, df_test),
                          ignore_index=True).drop(['target'], axis=1)

        self.__pred = cross_val_predict(estimator=self.estimator,
                                        X=X_tmp,
                                        y=self.__target,
                                        cv=self.__cv,
                                        method="predict_proba")[:,1]

        del df_train["target"]
        del df_test["target"]

        self.__fitOK = True

        return self