python类KFold()的实例源码

jadrian.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__( self, pdr, E_QC = "E_QC", Em = "Em", type_name = "Type", type_l = [1,2,3,4], 
                    disp = False, graph = False):

        # This parameter will be used in the run() function. 
        self.type_l = type_l
        self.disp = disp
        self.graph = graph

        self.xMa = {}
        self.yVa = {}
        # self.kfa = {}
        for type_id in type_l:
            pdr_new = pdr[ pdr[ type_name] == type_id]
            self.xMa[type_id] = np.mat( pdr_new[ E_QC].values).T
            self.yVa[type_id] = np.mat( pdr_new[ Em].values).T
            # kfa[type_id] = cross_validation.KFold( np.shape(yVa[type_id])[0], n_folds=5, shuffle=True)
jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def _gs_SVC_r0( xM, yVc, params):
    """
    Since classification is considered, we use yVc which includes digital values 
    whereas yV can include float point values.
    """

    print(xM.shape, yVc.shape)

    clf = svm.SVC()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=5, shuffle=True)
    gs = grid_search.GridSearchCV( clf, params, cv = kf5, n_jobs = -1)

    gs.fit( xM, yVc)

    return gs
jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def gs_SVC( xM, yVc, params, n_folds = 5):
    """
    Since classification is considered, we use yVc which includes digital values 
    whereas yV can include float point values.
    """

    print(xM.shape, yVc.shape)

    clf = svm.SVC()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    gs = grid_search.GridSearchCV( clf, params, cv = kf5, n_jobs = -1)

    gs.fit( xM, yVc)

    return gs
jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def gs_Ridge_BIKE( A_list, yV, XX = None, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge( A_list, XX)
    parmas = {'alpha': np.logspace( *alphas_log)}
    ln = A_list[0].shape[0] # ls is the number of molecules.

    kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)
    gs = grid_search.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n, n_jobs = n_jobs)

    AX_idx = np.array([list(range( ln))]).T
    gs.fit( AX_idx, yV)

    return gs
jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cv_Ridge_BIKE( A_list, yV, XX = None, alpha = 0.5, n_folds = 5, n_jobs = -1, grid_std = None):

    clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
    ln = A_list[0].shape[0] # ls is the number of molecules.
    kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)

    AX_idx = np.array([list(range( ln))]).T
    yV_pred = cross_validation.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

    print('The prediction output using cross-validation is given by:')
    jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
_jgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _gs_SVC_r0( xM, yVc, params):
    """
    Since classification is considered, we use yVc which includes digital values 
    whereas yV can include float point values.
    """

    print(xM.shape, yVc.shape)

    clf = svm.SVC()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=5, shuffle=True)
    gs = grid_search.GridSearchCV( clf, params, cv = kf5, n_jobs = -1)

    gs.fit( xM, yVc)

    return gs
_jgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def gs_Ridge_BIKE( A_list, yV, XX = None, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge( A_list, XX)
    parmas = {'alpha': np.logspace( *alphas_log)}
    ln = A_list[0].shape[0] # ls is the number of molecules.

    kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)
    gs = grid_search.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n, n_jobs = n_jobs)

    AX_idx = np.array([list(range( ln))]).T
    gs.fit( AX_idx, yV)

    return gs
_jgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def gs_BIKE_Ridge( A_list, yV, alphas_log = (1, -1, 9), X_concat = None, n_folds = 5, n_jobs = -1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge( A_list, X_concat)
    parmas = {'alpha': np.logspace( *alphas_log)}
    ln = A_list[0].shape[0] # ls is the number of molecules.

    kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)
    gs = grid_search.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n, n_jobs = n_jobs)

    AX_idx = np.array([list(range( ln))]).T
    gs.fit( AX_idx, yV)

    return gs
_jgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
_jgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _cv_LOO_r0( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_folds = xM.shape[0]

    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
base.py 文件源码 项目:stacking 作者: ikki407 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def create_cv_id(target, n_folds_ = 5, cv_id_name=cv_id_name, seed=407):
    try:
        a = StratifiedKFold(target['target'],n_folds=n_folds_, shuffle=True, random_state=seed)
        cv_index = a.test_folds
        print 'Done StratifiedKFold'
    except:
        cv_index = np.empty(len(target))
        a = KFold(len(target),n_folds=n_folds_, shuffle=True, random_state=seed)
        for idx, i in enumerate(a):
            cv_index[i[1]] = idx
        cv_index = cv_index.astype(int)
        print 'Done Kfold'

    np.save(INPUT_PATH + cv_id_name, cv_index)
    return 

######### Utils #########

#feature list????????????util??
test_cross_validation.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_kfold_no_shuffle():
    # Manually check that KFold preserves the data ordering on toy datasets
    splits = iter(cval.KFold(4, 2))
    train, test = next(splits)
    assert_array_equal(test, [0, 1])
    assert_array_equal(train, [2, 3])

    train, test = next(splits)
    assert_array_equal(test, [2, 3])
    assert_array_equal(train, [0, 1])

    splits = iter(cval.KFold(5, 2))
    train, test = next(splits)
    assert_array_equal(test, [0, 1, 2])
    assert_array_equal(train, [3, 4])

    train, test = next(splits)
    assert_array_equal(test, [3, 4])
    assert_array_equal(train, [0, 1, 2])
test_cross_validation.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_predefinedsplit_with_kfold_split():
    # Check that PredefinedSplit can reproduce a split generated by Kfold.
    folds = -1 * np.ones(10)
    kf_train = []
    kf_test = []
    for i, (train_ind, test_ind) in enumerate(cval.KFold(10, 5, shuffle=True)):
        kf_train.append(train_ind)
        kf_test.append(test_ind)
        folds[test_ind] = i
    ps_train = []
    ps_test = []
    ps = cval.PredefinedSplit(folds)
    for train_ind, test_ind in ps:
        ps_train.append(train_ind)
        ps_test.append(test_ind)
    assert_array_equal(ps_train, kf_train)
    assert_array_equal(ps_test, kf_test)
facenet.py 文件源码 项目:facerecognition 作者: guoxiaolu 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)

    tprs = np.zeros((nrof_folds,nrof_thresholds))
    fprs = np.zeros((nrof_folds,nrof_thresholds))
    accuracy = np.zeros((nrof_folds))

    diff = np.subtract(embeddings1, embeddings2)
    dist = np.sum(np.square(diff),1)
    indices = np.arange(nrof_pairs)

    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):

        # Find the best threshold for the fold
        acc_train = np.zeros((nrof_thresholds))
        for threshold_idx, threshold in enumerate(thresholds):
            _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
        best_threshold_index = np.argmax(acc_train)
        for threshold_idx, threshold in enumerate(thresholds):
            tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
        _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])

    tpr = np.mean(tprs,0)
    fpr = np.mean(fprs,0)
    return tpr, fpr, accuracy
facenet.py 文件源码 项目:facerecognition 作者: guoxiaolu 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)

    val = np.zeros(nrof_folds)
    far = np.zeros(nrof_folds)

    diff = np.subtract(embeddings1, embeddings2)
    dist = np.sum(np.square(diff),1)
    indices = np.arange(nrof_pairs)

    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):

        # Find the threshold that gives FAR = far_target
        far_train = np.zeros(nrof_thresholds)
        for threshold_idx, threshold in enumerate(thresholds):
            _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
        if np.max(far_train)>=far_target:
            f = interpolate.interp1d(far_train, thresholds, kind='slinear')
            threshold = f(far_target)
        else:
            threshold = 0.0

        val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])

    val_mean = np.mean(val)
    far_mean = np.mean(far)
    val_std = np.std(val)
    return val_mean, val_std, far_mean
splittrainvalidation.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def get_kfold_bydate(self, df, n_folds = 10):
        df.sort_values(by = ['time_date','time_id','start_district_id'], axis = 0, inplace = True)
        df.reset_index(drop=True, inplace = True)
        kf = KFold(df.shape[0], n_folds= n_folds, shuffle=False)
        for train_index, test_index in kf:
            print("TRAIN:", train_index, "TEST:", test_index)
        return kf
test_big.py 文件源码 项目:skutil 作者: tgsmith61591 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def test_large_grid():
        """In this test, we purposely overfit a RandomForest to completely random data
        in order to assert that the test error will far supercede the train error.
        """

        if not SK18:
            custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
        else:
            custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)

        # define the pipe
        pipe = Pipeline([
            ('scaler', SelectiveScaler()),
            ('pca', SelectivePCA(weight=True)),
            ('rf', RandomForestClassifier(random_state=42))
        ])

        # define hyper parameters
        hp = {
            'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
            'pca__whiten': [True, False],
            'pca__weight': [True, False],
            'pca__n_components': uniform(0.75, 0.15),
            'rf__n_estimators': randint(5, 10),
            'rf__max_depth': randint(5, 15)
        }

        # define the grid
        grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)

        # this will fail because we haven't fit yet
        assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)

        # fit the grid
        grid.fit(X_train, y_train)

        # score for coverage -- this might warn...
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grid.score(X_train, y_train)

        # coverage:
        assert grid._estimator_type == 'classifier'

        # get predictions
        tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)

        # evaluate score (SHOULD be better than random...)
        accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)

        # grid score reports:
        # assert fails for bad percentile
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})

        # assert fails for bad y_axis
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})

        # assert passes otherwise
        report_grid_score_detail(grid, charts=True, percentile=0.95)  # just ensure percentile works
multiuser.py 文件源码 项目:musm-adt17 作者: stefanoteso 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def crossvalidate(problem, dataset, set_size, uid, w, var, cov,
                  transform, old_alpha, lmbda=0.5):
    """Finds the best hyperparameters using cross-validation.

    Parameters
    ----------
    WRITEME

    Returns
    -------
    alpha : tuple
        The best hyperparameter.
    """

    if len(dataset) % _NUM_FOLDS != 0:
        return old_alpha

    kfold = KFold(len(dataset), n_folds=_NUM_FOLDS)
    f = compute_transform(uid, w, var, cov, transform, lmbda=lmbda)

    avg_accuracy = np.zeros(len(_ALPHAS))
    for i, alpha in enumerate(_ALPHAS):
        accuracies = []
        for tr_indices, ts_indices in kfold:
            w, _ = problem.select_query(dataset[tr_indices], set_size, alpha,
                                        transform=f)
            utilities = np.dot(w, dataset[ts_indices].T)
            accuracies.append((utilities > 0).mean())
        avg_accuracy[i] = sum(accuracies) / len(accuracies)

    alpha = _I_TO_ALPHA[np.argmax(avg_accuracy)]

    _LOG.debug('''\
            alpha accuracies = {avg_accuracy}
            best alpha = {alpha}
        ''', **locals())

    return alpha
utils.py 文件源码 项目:text-classification-with-convnets 作者: osmanbaskaya 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cross_validate(model, X, y, n_folds, batch_size, num_epoch, func_for_evaluation=None):

    # let's shuffle first.
    seed = 5
    np.random.seed(seed)
    np.random.shuffle(X)
    np.random.seed(seed)
    np.random.shuffle(y)

    X = np.array(X)
    y = np.array(y)

    scores = np.zeros(n_folds)
    kf = KFold(len(y), n_folds=n_folds)
    for i, (train_index, test_index) in enumerate(kf):
        X_train, y_train = X[train_index, :], y[train_index]
        X_test, y_test = X[test_index, :], y[test_index]
        model.fit(X_train, y_train,
                  batch_size=batch_size,
                  nb_epoch=num_epoch)

        predictions = model.predict(X_test)
        score = func_for_evaluation(predictions[:, 0].tolist(), y_test)
        try:
            scores[i] = score[0]
        except IndexError:
            scores[i] = score


    print "{}-Fold cross validation score: {}".format(n_folds, scores.mean())


问题


面经


文章

微信
公众号

扫码关注公众号