python类KFold()的实例源码-面圈网

_jgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def _cv_r0( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

utilities.py 文件源码项目：aueb.twitter.sentiment 作者: nlpaueb 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def getConfidenceScores(features_train, labels_train, C):
    train_confidence = []
    #confidence scores for training data are computed using K-fold cross validation
    kfold = KFold(features_train.shape[0], n_folds=10)

    for train_index,test_index in kfold:
        X_train, X_test = features_train[train_index], features_train[test_index]
        y_train, y_test = labels_train[train_index], labels_train[test_index]

        #train classifier for the subset of train data
        m = SVM.train(X_train,y_train,c=C,k="linear")

        #predict confidence for test data and append it to list
        conf = m.decision_function(X_test)
        for x in conf:
                train_confidence.append(x)

    return np.array(train_confidence)

#save pos scores

test_pyglmnet.py 文件源码项目：pyglmnet 作者: glm-tools 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()
    cv = KFold(X.shape[0], 5)

    glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
    # check that it returns 5 scores
    scores = cross_val_score(glm_normal, X, y, cv=cv)
    assert_equal(len(scores), 5)

    param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
                  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
                                             10, base=np.exp(1))}]
    glmcv = GridSearchCV(glm_normal, param_grid, cv=cv)
    glmcv.fit(X, y)

features.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _calculate(self, X, y, categorical, metafeatures, helpers):
        import sklearn.lda
        if len(y.shape) == 1 or y.shape[1] == 1:
            kf = cross_validation.StratifiedKFold(y, n_folds=10)
        else:
            kf = cross_validation.KFold(y.shape[0], n_folds=10)

        accuracy = 0.
        try:
            for train, test in kf:
                lda = sklearn.lda.LDA()

                if len(y.shape) == 1 or y.shape[1] == 1:
                    lda.fit(X[train], y[train])
                else:
                    lda = OneVsRestClassifier(lda)
                    lda.fit(X[train], y[train])

                predictions = lda.predict(X[test])
                accuracy += sklearn.metrics.accuracy_score(predictions, y[test])
            return accuracy / 10
        except LinAlgError as e:
            self.logger.warning("LDA failed: %s Returned 0 instead!" % e)
            return np.NaN
        except ValueError as e:
            self.logger.warning("LDA failed: %s Returned 0 instead!" % e)
            return np.NaN

features.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def _calculate(self, X, y, categorical, metafeatures, helpers):
        import sklearn.naive_bayes

        if len(y.shape) == 1 or y.shape[1] == 1:
            kf = cross_validation.StratifiedKFold(y, n_folds=10)
        else:
            kf = cross_validation.KFold(y.shape[0], n_folds=10)

        accuracy = 0.
        for train, test in kf:
            nb = sklearn.naive_bayes.GaussianNB()

            if len(y.shape) == 1 or y.shape[1] == 1:
                nb.fit(X[train], y[train])
            else:
                nb = OneVsRestClassifier(nb)
                nb.fit(X[train], y[train])

            predictions = nb.predict(X[test])
            accuracy += sklearn.metrics.accuracy_score(predictions, y[test])
        return accuracy / 10

features.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def _calculate(self, X, y, categorical, metafeatures, helpers):
        import sklearn.tree

        if len(y.shape) == 1 or y.shape[1] == 1:
            kf = cross_validation.StratifiedKFold(y, n_folds=10)
        else:
            kf = cross_validation.KFold(y.shape[0], n_folds=10)

        accuracy = 0.
        for train, test in kf:
            random_state = check_random_state(42)
            tree = sklearn.tree.DecisionTreeClassifier(random_state=random_state)

            if len(y.shape) == 1 or y.shape[1] == 1:
                tree.fit(X[train], y[train])
            else:
                tree = OneVsRestClassifier(tree)
                tree.fit(X[train], y[train])

            predictions = tree.predict(X[test])
            accuracy += sklearn.metrics.accuracy_score(predictions, y[test])
        return accuracy / 10

features.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _calculate(self, X, y, categorical, metafeatures, helpers):
        import sklearn.tree

        if len(y.shape) == 1 or y.shape[1] == 1:
            kf = cross_validation.StratifiedKFold(y, n_folds=10)
        else:
            kf = cross_validation.KFold(y.shape[0], n_folds=10)

        accuracy = 0.
        for train, test in kf:
            random_state = check_random_state(42)
            node = sklearn.tree.DecisionTreeClassifier(
                    criterion="entropy", max_depth=1, random_state=random_state,
                    min_samples_split=1, min_samples_leaf=1, max_features=None)
            if len(y.shape) == 1 or y.shape[1] == 1:
                node.fit(X[train], y[train])
            else:
                node = OneVsRestClassifier(node)
                node.fit(X[train], y[train])
            predictions = node.predict(X[test])
            accuracy += sklearn.metrics.accuracy_score(predictions, y[test])
        return accuracy / 10

features.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _calculate(self, X, y, categorical, metafeatures, helpers):
        import sklearn.tree

        if len(y.shape) == 1 or y.shape[1] == 1:
            kf = cross_validation.StratifiedKFold(y, n_folds=10)
        else:
            kf = cross_validation.KFold(y.shape[0], n_folds=10)
        accuracy = 0.

        for train, test in kf:
            random_state = check_random_state(42)
            node = sklearn.tree.DecisionTreeClassifier(
                    criterion="entropy", max_depth=1, random_state=random_state,
                    min_samples_split=1, min_samples_leaf=1, max_features=1)
            if len(y.shape) == 1 or y.shape[1] == 1:
                node.fit(X[train], y[train])
            else:
                node = OneVsRestClassifier(node)
                node.fit(X[train], y[train])
            predictions = node.predict(X[test])
            accuracy += sklearn.metrics.accuracy_score(predictions, y[test])
        return accuracy / 10

classifiers.py 文件源码项目：avito-contest 作者: fmilepe 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def rede_neural(X, y):
    print("Iniciando treinamento da Rede Neural")

    X2 = normalize(X)

    clf = MLPClassifier(hidden_layer_sizes=(100,50), activation='tanh', algorithm='adam', alpha=1e-5,
                        learning_rate='constant',tol=1e-8,learning_rate_init=0.0002,
                        early_stopping=True,validation_fraction=0.2)

    kf = KFold(len(y),n_folds=3)
    i = 0
    for train,test in kf:
        start = time.time()
        i = i + 1
        print("Treinamento",i)

        # dividindo dataset em treino e test
        #X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=1)
        X_train, X_test, y_train, y_test = X2[train], X2[test], y[train], y[test]

        # fit
        clf.fit(X_train, y_train)
        print("score:",clf.score(X_test, y_test),"(",(time.time()-start)/60.0,"minutos )")
    return clf

a30_pretrained_nets_pipeline_with_additional_data.py 文件源码项目：KAGGLE_CERVICAL_CANCER_2017 作者: ZFTurbo 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def run_cross_validation_create_models(cnn, nfolds, submission_version):
    from sklearn.cross_validation import KFold
    files = glob.glob(INPUT_PATH + "*/*.jpg")
    additional_files = glob.glob(INPUT_PATH_ADD + "*/*.jpg")
    kf = KFold(len(files), n_folds=nfolds, shuffle=True, random_state=get_random_state(cnn))
    num_fold = 0
    sum_score = 0
    print('Len of additional files: {}'.format(len(additional_files)))
    for train_index, test_index in kf:
        num_fold += 1
        print('Start KFold number {} from {}'.format(num_fold, nfolds))
        print('Split train: ', len(train_index))
        print('Split valid: ', len(test_index))

        score = train_single_model(cnn, num_fold, train_index, test_index, files, additional_files, submission_version)
        sum_score += score

    print('Avg loss: {}'.format(sum_score/nfolds))

a2.py 文件源码项目：assignments 作者: iit-cs579 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def cross_validation_accuracy(clf, X, labels, k):
    """
    Compute the average testing accuracy over k folds of cross-validation. You
    can use sklearn's KFold class here (no random seed, and no shuffling
    needed).

    Params:
      clf......A LogisticRegression classifier.
      X........A csr_matrix of features.
      labels...The true labels for each instance in X
      k........The number of cross-validation folds.

    Returns:
      The average testing accuracy of the classifier
      over each fold of cross-validation.
    """
    ###TODO
    pass

knock78.py 文件源码项目：100knock2016 作者: tmu-nlp 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def make_kfold(target, feature):
    preds = []
    kf = KFold(len(target), n_folds=folds,shuffle=True)
    test_numbers = []
    for trains, tests in kf:
        test_numbers.append(tests)
        pred_list = []
        feature_list = word_vec.fit_transform([dict(Counter(feature[train])) for train in trains])
        target_list = [target[train] for train in trains]
        logreg.fit(feature_list, target_list)
        for test in tests:
            feature_dict = defaultdict(int)
            for f in word_vec.get_feature_names():
                feature_dict[f] = 0
            for key, value in dict(Counter(feature[test])).items():
                if key in feature_dict:
                    feature_dict[key] = value
            pred_list.append(feature_dict)
        preds.append(logreg.predict(word_vec.fit_transform(pred_list)))
    return preds, test_numbers

knock78.py 文件源码项目：100knock2016 作者: tmu-nlp 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def make_kfold(target, feature):
    preds = []
    kf = KFold(len(target), n_folds=folds,shuffle=True)
    test_numbers = []
    for trains, tests in kf:
        test_numbers.append(tests)
        pred_list = []
        feature_list = word_vec.fit_transform([dict(Counter(feature[train])) for train in trains])
        target_list = [target[train] for train in trains]
        logreg.fit(feature_list, target_list)
        for test in tests:
            feature_dict = defaultdict(int)
            for f in word_vec.get_feature_names():
                feature_dict[f] = 0
            for key, value in dict(Counter(feature[test])).items():
                if key in feature_dict:
                    feature_dict[key] = value
            pred_list.append(feature_dict)
        preds.append(logreg.predict(word_vec.fit_transform(pred_list)))
    return preds, test_numbers

knock78.py 文件源码项目：100knock2016 作者: tmu-nlp 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def eval_cv5(model, x, y):
    kf = KFold(len(y), n_folds=5)
    acc = np.array([])
    pre = np.array([])
    rec = np.array([])
    f1 = np.array([])
    for train_index, test_index in kf:
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(x_train, y_train)
        prediction = model.predict(x_test)
        evaluation = get_eval(prediction, y_test)
        acc = np.append(acc, np.array(evaluation[0]))
        pre = np.append(pre, np.array(evaluation[1]))
        rec = np.append(rec, np.array(evaluation[2]))
        f1 = np.append(f1, np.array(evaluation[3]))

    return acc.mean(), pre.mean(), rec.mean(), f1.mean()

cross_validation.py 文件源码项目：ottertune 作者: cmu-db 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def __init__(self, estimator_cls, parameter_grid, score_fns,
                 nfolds=10, shuffle=False, seed=None, njobs=1,
                 checkpoint_path=None):
        self.estimator_cls = estimator_cls
        self.parameter_grid = parameter_grid
        self.nfolds = nfolds
        self.seed = seed
        assert njobs == 1, "# jobs > 1 not supported."
        self.njobs = njobs
        assert _is_arraylike(score_fns)
        self.score_fns = score_fns
        self.checkpoint_path = checkpoint_path
        self.grid_scores = None
        self.kf = KFold(n_folds=self.nfolds,
                        shuffle=shuffle,
                        random_state=seed)

cache.py 文件源码项目：FLASH 作者: yuyuz 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def cached_run(steps, X, y):
    step_identifier = ''

    # split data
    n = len(y)
    kf = KFold(n, _n_fold, random_state=_random_state)
    folded_data = [(X[train_index], y[train_index], X[test_index], y[test_index]) for train_index, test_index in kf]

    # last step is estimator, handle separately
    for step in steps[:-1]:
        step_identifier += "/%s" % _step_identifier(step)
        logger.info("Processing %s", step_identifier)
        folded_data = run_step_on_demand(step_identifier, step, folded_data)

    scores = []
    estimator = steps[-1]
    step_identifier += "/%s" % _step_identifier(estimator)
    for (X_train, y_train, X_test, y_test) in folded_data:
        estimator.fit(X_train, y_train)
        scores.append(estimator.score(X_test, y_test))

    score = np.mean(scores)
    logger.info("score of %s is %r", step_identifier, score)
    return score

sampling.py 文件源码项目：nba-games 作者: ixarchakos 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def k_fold_sample_data_set(x, y, folds):
    """
    This function uses a k-fold approach as a re-sampling strategy
    :param x: numpy array
        - Includes the train data
    :param y: numpy array
        - Includes the actual value of each data sample
    :param folds: integer
        - The number of folds that splits the data set
    :return: list of lists
        - The training and test samples extracted from the training set
    """
    x_train_list, y_train_list, x_test_list, y_test_list = list(), list(), list(), list()
    try:
        kf = KFold(x.shape[0], n_folds=folds, shuffle=True)
        for train_index, test_index in kf:
            x_train_list.append(x[train_index])
            y_train_list.append(y[train_index])
            x_test_list.append(x[test_index])
            y_test_list.append(y[test_index])
        return x_train_list, y_train_list, x_test_list, y_test_list
    except AttributeError as e:
        print(e.args, "- Please, use numpy arrays as inputs")
        exit()

mnist.py 文件源码项目：python-rustlearn 作者: maciejkula 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def run_example():

    data, target = _get_data()

    n_folds = 5
    accuracy = 0.0

    for (train_idx, test_idx) in KFold(n=len(data), n_folds=n_folds, shuffle=True):

        train_X = data[train_idx]
        train_y = target[train_idx]

        test_X = data[test_idx]
        test_y = target[test_idx]

        model = SGDClassifier()
        model.fit(train_X, train_y)

        predictions = model.predict(test_X)

        accuracy += accuracy_score(predictions, test_y)

    return accuracy / n_folds

test_init.py 文件源码项目：python-rustlearn 作者: maciejkula 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def run_example():

    data, target = _get_data()

    n_folds = 5
    accuracy = 0.0

    for (train_idx, test_idx) in KFold(n=len(data), n_folds=n_folds, shuffle=True):

        train_X = data[train_idx]
        train_y = target[train_idx]

        test_X = data[test_idx]
        test_y = target[test_idx]

        model = SGDClassifier()
        model.fit(train_X, train_y)

        predictions = model.predict(test_X)

        accuracy += accuracy_score(predictions, test_y)

    return accuracy / n_folds

utils.py 文件源码项目：ilastik-feature-selection 作者: ilastik 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def kfold_train_and_predict(X, Y, classifier, k = 5, indices = None, features = None):
    if indices is None:
        indices = np.array(list(range(X.shape[0])))
    if features is None:
        features = np.array(list(range(X.shape[1])))
    kf = cross_validation.KFold(len(indices), n_folds=k)
    accurs = []
    for train, test in kf:
        train_ind = indices[train].astype("int")
        test_ind = indices[test].astype("int")

        classifier.fit(X[train_ind,:][:,features], Y[train_ind])
        accurs += [classifier.score(X[test_ind,:][:,features], Y[test_ind])]

    accurs = np.array(accurs)
    return np.mean(accurs), np.std(accurs)

code.py 文件源码项目：The_Ultimate_Student_Hunt 作者: analyticsvidhya 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def run_model(model,dtrain,predictor_var,target,scoring_method='mean_squared_error'):
    cv_method = KFold(len(dtrain),5)
    cv_scores = cross_val_score(model,dtrain[predictor_var],dtrain[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores, np.mean(cv_scores), np.sqrt((-1)*np.mean(cv_scores))

    dtrain_for_val = dtrain[dtrain['Year']<2000]
    dtest_for_val = dtrain[dtrain['Year']>1999]
    #cv_method = KFold(len(dtrain_for_val),5)
    #cv_scores_2 = cross_val_score(model,dtrain_for_val[predictor_var],dtrain_for_val[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores_2, np.mean(cv_scores_2)

    dtrain_for_val_ini = dtrain_for_val[predictor_var]
    dtest_for_val_ini = dtest_for_val[predictor_var]
    model.fit(dtrain_for_val_ini,dtrain_for_val[target])
    pred_for_val = model.predict(dtest_for_val_ini)

    #print math.sqrt(mean_squared_error(dtest_for_val['Footfall'],pred_for_val))

methods.py 文件源码项目：South-African-Heart-Disease-data-analysis-using-python 作者: khushi4tiwari 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def getTestAndTrainingSet(X,y,K=10):
    N = len(X)

    CV = cross_validation.KFold(N,K,shuffle=True)

    k=0

    for train_index, test_index in CV:

        # extract training and test set for current CV fold
        X_train = X[train_index,:]
        y_train = y[train_index,:]
        X_test = X[test_index,:]
        y_test = y[test_index,:]
        k+=1

        if(k==K):
            return (X_train,y_train),(X_test,y_test)

knock78.py 文件源码项目：100knock2017 作者: tmu-nlp 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def cv(feature_dict, feature, polarity, folds):
    kfold = KFold(len(polarity), n_folds = folds)
    count, f1, recall, precision, accuracy = 0, 0, 0, 0, 0
    for train, test in kfold:
        LR = LogisticRegression()
        count += 1
        x = [(feature[i]) for i in train]
        y = [(polarity[i])for i in train]
        LR.fit(scipy.sparse.vstack(x), (y))

        test_label = []
        answer_label = [(polarity[j]) for j in test]
        for j in test:
            query = feature[j]
            result = -1 if query.shape[1] != len(feature_dict) else predict(LR, query)
            test_label.append(int(result[0]))
        accuracy += accuracy_score(answer_label, test_label)
        precision += precision_score(answer_label, test_label)
        recall += recall_score(answer_label, test_label)
        f1 += f1_score(answer_label, test_label)
        print('{}_fold finished.'.format(count))
    return accuracy, precision, recall, f1

knock79.py 文件源码项目：100knock2017 作者: tmu-nlp 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def cv(feature_dict, feature, polarity, folds):
    kfold = KFold(len(polarity), n_folds = folds)
    count, f1, recall, precision, accuracy = 0, 0, 0, 0, 0
    for train, test in kfold:
        LR = LogisticRegression()
        count += 1
        x = [(feature[i]) for i in train]
        y = [(polarity[i])for i in train]
        LR.fit(scipy.sparse.vstack(x), (y))

        test_label = []
        answer_label = [(polarity[j]) for j in test]
        for j in test:
            query = feature[j]
            result = -1 if query.shape[1] != len(feature_dict) else predict(LR, query)
            test_label.append(result[1][1])
        pre, rec, thr = precision_recall_curve(answer_label, test_label)
        return pre, rec, thr
    return accuracy, precision, recall, f1

convolutional_sparseFiltering.py 文件源码项目：hco-experiments 作者: zooniverse 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def cross_validate_Softmax(dataFile, X, Y, pooledFile, imageDim, sgd, save=True, n_folds=5):

    from sklearn.cross_validation import KFold

    m = len(np.squeeze(Y))
    CGrid = [0.1, 0.03, 0.01, 0.003, 0.001, 3e-4, 1e-4, 3e-5, 1e-5]
    kf = KFold(m, n_folds=n_folds)
    mean_FoMs = []
    for C in CGrid:
        fold = 1
        FoMs = []
        for train, test in kf:
            print("[+] training Softmax: LAMBDA : %e, fold : %d" % (C, fold))
            prefix = "cv/cv_fold%d" % fold
            FoM, threshold = train_Softmax(C, dataFile, X[train], Y[train], X[test], Y[test], \
                                             pooledFile, imageDim, sgd, prefix=prefix)
            FoMs.append(FoM)
            fold += 1
        mean_FoMs.append(np.mean(FoMs))

    best_FoM_index = np.argmin(mean_FoMs)
    print("[+] Best performing classifier: C : %lf" % CGrid[best_FoM_index])
    return CGrid[best_FoM_index]

convolutional_sparseFiltering.py 文件源码项目：hco-experiments 作者: zooniverse 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def cross_validate_SoftMaxOnline(dataFile, X, Y, pooledFile, imageDim, sgd, save=True, n_folds=5):

    from sklearn.cross_validation import KFold

    m = len(np.squeeze(Y))
    CGrid = [10, 3, 1, 0.3, 0.1, 0.03, 0.01, 0.003, 0.001]
    kf = KFold(m, n_folds=n_folds, indices=False)
    mean_FoMs = []
    for C in CGrid:
        fold = 1
        FoMs = []
        for train, test in kf:
            print("[+] training SoftMaxOnline: LAMBDA : %e, fold : %d" % (C, fold))
            prefix = "cv/cv_fold%d" % fold
            FoM, threshold = train_SoftMaxOnline(C, dataFile, X[train], Y[train], X[test], Y[test], \
                                                 pooledFile, imageDim, sgd, prefix=prefix)
            FoMs.append(FoM)
            fold += 1
        mean_FoMs.append(np.mean(FoMs))

    best_FoM_index = np.argmin(mean_FoMs)
    print("[+] Best performing classifier: C : %lf" % CGrid[best_FoM_index])
    return CGrid[best_FoM_index]

convolutional_sparseFiltering.py 文件源码项目：hco-experiments 作者: zooniverse 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def cross_validate_linearSVM(dataFile, X, Y, pooledFile, imageDim, sgd, save=True, n_folds=5):

    from sklearn.cross_validation import KFold

    m = len(np.squeeze(Y))
    CGrid = [10, 3, 1, 0.3, 0.1, 0.03, 0.01, 0.003, 0.001]
    kf = KFold(m, n_folds=n_folds, indices=False)
    mean_FoMs = []
    for C in CGrid:
        fold = 1
        FoMs = []
        for train, test in kf:
            print("[+] training linear SVM: C : %e, fold : %d" % (C, fold))
            prefix = "cv/cv_fold%d" % fold
            FoM, threshold = train_linearSVM(C, dataFile, X[train], Y[train], X[test], Y[test], \
                                   pooledFile, imageDim, sgd, prefix=prefix)
            FoMs.append(FoM)
            fold += 1
        mean_FoMs.append(np.mean(FoMs))

    best_FoM_index = np.argmin(mean_FoMs)
    print("[+] Best performing classifier: C : %lf" % CGrid[best_FoM_index])
    return CGrid[best_FoM_index]

convolutional_sparseFiltering.py 文件源码项目：hco-experiments 作者: zooniverse 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def cross_validate_Softmax(dataFile, X, Y, pooledFile, imageDim, sgd, save=True, n_folds=5):

    from sklearn.cross_validation import KFold

    m = len(np.squeeze(Y))
    CGrid = [0.1, 0.03, 0.01, 0.003, 0.001, 3e-4, 1e-4, 3e-5, 1e-5]
    kf = KFold(m, n_folds=n_folds, indices=False)
    mean_FoMs = []
    for C in CGrid:
        fold = 1
        FoMs = []
        for train, test in kf:
            print "[+] training Softmax: LAMBDA : %e, fold : %d" % (C, fold)
            prefix = "cv/cv_fold%d" % fold
            FoM, threshold = train_Softmax(C, dataFile, X[train], Y[train], X[test], Y[test], \
                                             pooledFile, imageDim, sgd, prefix=prefix)
            FoMs.append(FoM)
            fold += 1
        mean_FoMs.append(np.mean(FoMs))

    best_FoM_index = np.argmin(mean_FoMs)
    print "[+] Best performing classifier: C : %lf" % CGrid[best_FoM_index]
    return CGrid[best_FoM_index]

convolutional_sparseFiltering.py 文件源码项目：hco-experiments 作者: zooniverse 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def cross_validate_linearSVM(dataFile, X, Y, pooledFile, imageDim, sgd, save=True, n_folds=5):

    from sklearn.cross_validation import KFold

    m = len(np.squeeze(Y))
    CGrid = [10, 3, 1, 0.3, 0.1, 0.03, 0.01, 0.003, 0.001]
    kf = KFold(m, n_folds=n_folds, indices=False)
    mean_FoMs = []
    for C in CGrid:
        fold = 1
        FoMs = []
        for train, test in kf:
            print "[+] training linear SVM: C : %e, fold : %d" % (C, fold)
            prefix = "cv/cv_fold%d" % fold
            FoM, threshold = train_linearSVM(C, dataFile, X[train], Y[train], X[test], Y[test], \
                                   pooledFile, imageDim, sgd, prefix=prefix)
            FoMs.append(FoM)
            fold += 1
        mean_FoMs.append(np.mean(FoMs))

    best_FoM_index = np.argmin(mean_FoMs)
    print "[+] Best performing classifier: C : %lf" % CGrid[best_FoM_index]
    return CGrid[best_FoM_index]

knn_cv.py 文件源码项目：ML 作者: saurabhsuman47 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def knn_cv(post_features, post_class, n_folds, n_neighbors, length_dataset = -1):

    if(length_dataset == -1):
        length_dataset = len(post_class)
    cv = KFold(n = length_dataset, n_folds = n_folds, shuffle = True)
    train_accuracy = []
    test_accuracy = []

    for train,test in cv:
        knn = neighbors.KNeighborsClassifier(n_neighbors = n_neighbors)
        knn.fit(post_features[train],post_class[train])
        train_accuracy.append(knn.score(post_features[train], post_class[train]))
        test_accuracy.append(knn.score(post_features[test], post_class[test]))

#    return (sum(train_accuracy)/n_folds), (sum(test_accuracy)/n_folds)
    return np.mean(train_accuracy), np.mean(test_accuracy)