python类KFold()的实例源码-面圈网

movielens_vae_test.py 文件源码项目：VAE-MF-TensorFlow 作者: arongdari 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def cross_validation():
    M = read_dataset()
    n_fold = 10

    rating_idx = np.array(M.nonzero()).T
    kf = KFold(n_splits=n_fold, random_state=0)

    with tf.Session() as sess:
        model = VAEMF(sess, num_user, num_item,
                      hidden_encoder_dim=hidden_encoder_dim, hidden_decoder_dim=hidden_decoder_dim,
                      latent_dim=latent_dim, output_dim=output_dim, learning_rate=learning_rate, batch_size=batch_size, reg_param=reg_param)

        for i, (train_idx, test_idx) in enumerate(kf.split(rating_idx)):
            print("{0}/{1} Fold start| Train size={2}, Test size={3}".format(i,
                                                                             n_fold, train_idx.size, test_idx.size))
            model.train(M, train_idx=train_idx,
                        test_idx=test_idx, n_steps=n_steps)

movielens_test.py 文件源码项目：VAE-MF-TensorFlow 作者: arongdari 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def cross_validation():
    M = read_dataset()
    n_fold = 10

    rating_idx = np.array(M.nonzero()).T
    kf = KFold(n_splits=n_fold, random_state=0)

    with tf.Session() as sess:
        model = VAEMF(sess, num_user, num_item,
                      hidden_encoder_dim=hidden_encoder_dim, hidden_decoder_dim=hidden_decoder_dim,
                      latent_dim=latent_dim, output_dim=output_dim, learning_rate=learning_rate, batch_size=batch_size, reg_param=reg_param, one_hot=one_hot)

        for i, (train_idx, test_idx) in enumerate(kf.split(rating_idx)):
            print("{0}/{1} Fold start| Train size={2}, Test size={3}".format(i,
                                                                             n_fold, train_idx.size, test_idx.size))
            model.train(M, train_idx=train_idx,
                        test_idx=test_idx, n_steps=n_steps)

trainer.py 文件源码项目：geocoder-ie 作者: devgateway 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def kfold_train(self, n_splits=3):
        logger.info('train classifier using kFold')
        kf = KFold(n_splits=n_splits, shuffle=True)
        scores = []
        precisions = []
        recalls = []
        for train_index, test_index in kf.split(self.data):
            train_text = self.data.iloc[train_index]['text'].values
            train_y = self.data.iloc[train_index]['class'].values

            test_text = self.data.iloc[test_index]['text'].values
            test_y = self.data.iloc[test_index]['class'].values

            self.cls.train(train_text, train_y)
            predictions = self.cls.predict(test_text)
            self.confusion += confusion_matrix(test_y, predictions)
            scores.append(f1_score(test_y, predictions, pos_label='geography'))
            recalls.append(recall_score(test_y, predictions, pos_label='geography'))
            precisions.append(precision_score(test_y, predictions, pos_label='geography'))

        self.score = sum(scores) / len(scores)
        self.precision = sum(precisions) / len(precisions)
        self.recall = sum(recalls) / len(recalls)

        return self.cls

task.py 文件源码项目：stacker 作者: bamine 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def __init__(self, name, X, y, task, test_size=None, cv=None, random_state=42):
        self.name = name
        self.X = X
        self.y = y
        self.task = task
        self.random_state = random_state
        if test_size is not None:
            self.test_size = test_size
            self.validation_method = "train_test_split"
            self.X_train, self.X_test, self.y_train, self.y_test = \
                model_selection.train_test_split(self.X, self.y, test_size=test_size, random_state=random_state)
        elif cv is not None:
            self.validation_method = "cv"
            if task == "regression":
                self.kfold = model_selection.KFold(n_splits=cv, random_state=random_state)
            elif task == "classification":
                self.kfold = model_selection.StratifiedKFold(n_splits=cv, shuffle=True, random_state=random_state)

experiment.py 文件源码项目：Graph-CNN 作者: fps7806 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def set_kfold(self, no_folds = 10, fold_id = 0):
        inst = KFold(n_splits = no_folds, shuffle=True, random_state=125)
        self.fold_id = fold_id

        self.KFolds = list(inst.split(np.arange(self.no_samples)))
        self.train_idx, self.test_idx = self.KFolds[fold_id]
        self.no_samples_train = self.train_idx.shape[0]
        self.no_samples_test = self.test_idx.shape[0]
        self.print_ext('Data ready. no_samples_train:', self.no_samples_train, 'no_samples_test:', self.no_samples_test)

        if self.train_batch_size == 0:
            self.train_batch_size = self.no_samples_train
        if self.test_batch_size == 0:
            self.test_batch_size = self.no_samples_test
        self.train_batch_size = min(self.train_batch_size, self.no_samples_train)
        self.test_batch_size = min(self.test_batch_size, self.no_samples_test)

    # This function is cropped before batch
    # Slice each sample to improve performance

a25_unet_training_v2_on_boxes.py 文件源码项目：KAGGLE_CERVICAL_CANCER_2017 作者: ZFTurbo 项目源码文件源码阅读 120 收藏 0 点赞 0 评论 0

def run_cross_validation_create_models_unet2(nfolds=5):
    from sklearn.model_selection import KFold
    files_full = glob.glob(INPUT_PATH + "*/*.png")
    files = []
    for f in files_full:
        if '_mask' not in f:
            continue
        files.append(f)

    kf = KFold(n_splits=nfolds, shuffle=True, random_state=66)
    num_fold = 0
    sum_score = 0
    for train_index, test_index in kf.split(range(len(files))):
        num_fold += 1
        print('Start KFold number {} from {}'.format(num_fold, nfolds))
        print('Split train: ', len(train_index))
        print('Split valid: ', len(test_index))
        if num_fold != 2:
            continue
        score = train_single_model(num_fold, train_index, test_index, files)
        sum_score += score

    print('Avg loss: {}'.format(sum_score/nfolds))

a25_unet_training_v1_on_my_segmentation.py 文件源码项目：KAGGLE_CERVICAL_CANCER_2017 作者: ZFTurbo 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def run_cross_validation_create_models_unet1(nfolds=5):
    from sklearn.model_selection import KFold
    files_full = glob.glob(INPUT_PATH + "*/*.png")
    files = []
    for f in files_full:
        if '_mask' in f:
            continue
        files.append(f)

    kf = KFold(n_splits=nfolds, shuffle=True, random_state=66)
    num_fold = 0
    sum_score = 0
    for train_index, test_index in kf.split(range(len(files))):
        num_fold += 1
        print('Start KFold number {} from {}'.format(num_fold, nfolds))
        print('Split train: ', len(train_index))
        print('Split valid: ', len(test_index))
        score = train_single_model(num_fold, train_index, test_index, files)
        sum_score += score

    print('Avg loss: {}'.format(sum_score/nfolds))

modeltest.py 文件源码项目：strategy 作者: kanghua309 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())

CCIT.py 文件源码项目：CCIT 作者: rajatsen91 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def cross_validate(classifier, n_folds = 5):
    '''Custom cross-validation module I always use '''
    train_X = classifier['train_X']
    train_y = classifier['train_y']
    model = classifier['model']
    score = 0.0

    skf = KFold(n_splits = n_folds)
    for train_index, test_index in skf.split(train_X):
        X_train, X_test = train_X[train_index], train_X[test_index]
        y_train, y_test = train_y[train_index], train_y[test_index]
        clf = model.fit(X_train,y_train)
        pred = clf.predict_proba(X_test)[:,1]
        #print 'cross', roc_auc_score(y_test,pred)
        score = score + roc_auc_score(y_test,pred)

    return score/n_folds

train.py 文件源码项目：tensorflow_kaggle_house_price 作者: Cuongvn08 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def fit(self, X, y):
        self.base_models_ = [list() for x in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=15)

        # train cloned base models then create out-of-fold predictions that are needed to train the cloned meta-model
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                self.base_models_[i].append(instance)
                instance.fit(X[train_index], y[train_index])
                y_pred = instance.predict(X[holdout_index])
                out_of_fold_predictions[holdout_index, i] = y_pred

        # now train the cloned  meta-model using the out-of-fold predictions as new feature
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self

    # do the predictions of all base models on the test data and use the averaged predictions as 
    #meta-features for the final prediction which is done by the meta-model

main.py 文件源码项目：simple-linear-regression 作者: williamd4112 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def train_cross_validation(args, sess, model, phi_xs_train, ys_train):
    kf = KFold(n_splits=args.K)

    w_best = None
    validation_loss = 0

    for train_index, validation_index in kf.split(phi_xs_train):
        sess.run(tf.global_variables_initializer())

        model.fit(sess, phi_xs_train[train_index], ys_train[train_index], epoch=args.epoch, batch_size=args.batch_size)
        loss = model.eval(sess, phi_xs_train[validation_index], ys_train[validation_index])

        logging.info('Validation loss = %f' % (loss))
        validation_loss += loss

        model.reset(sess)

    return validation_loss / float(args.K)

fitness.py 文件源码项目：GAKeras 作者: PetraVidnerova 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def evaluate(self, individual):
        #print(" *** evaluate *** ")

        #model = individual.createNetwork()
        #return random.random(), 

        random.seed(42) 
        # perform KFold crossvalidation 
        kf = KFold(n_splits=3)
        scores = []
        for train, test in kf.split(self.X):   # train, test are indicies 
            X_train, X_test = self.X[train], self.X[test]
            y_train, y_test = self.y[train], self.y[test]

            model = individual.createNetwork()
            model.fit(X_train, y_train,
                      batch_size=Config.batch_size, nb_epoch=Config.epochs, verbose=0)

            yy_test = model.predict(X_test)
            scores.append(error(y_test, yy_test))

        fitness = np.mean(scores)

        return fitness,

dataset.py 文件源码项目：heamy 作者: rushter 项目源码文件源码阅读 66 收藏 0 点赞 0 评论 0

def kfold(self, k=5, stratify=False, shuffle=True, seed=33):
        """K-Folds cross validation iterator.

        Parameters
        ----------
        k : int, default 5
        stratify : bool, default False
        shuffle : bool, default True
        seed : int, default 33

        Yields
        -------
        X_train, y_train, X_test, y_test, train_index, test_index
        """
        if stratify:
            kf = StratifiedKFold(n_splits=k, random_state=seed, shuffle=shuffle)
        else:
            kf = KFold(n_splits=k, random_state=seed, shuffle=shuffle)

        for train_index, test_index in kf.split(self.X_train, self.y_train):
            X_train, y_train = idx(self.X_train, train_index), self.y_train[train_index]
            X_test, y_test = idx(self.X_train, test_index), self.y_train[test_index]
            yield X_train, y_train, X_test, y_test, train_index, test_index

cnn_expression_normal.py 文件源码项目：convneuralnetwork 作者: clutariomark 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def do_kfold(proc_images, proc_labels, split=10):
    trainimages = []
    trainlabels = []
    testimages = []
    testlabels = []
    rand_idx = random.sample(range(0, len(proc_images)), len(proc_images))
    proc_images = proc_images[rand_idx]
    proc_labels = proc_labels[rand_idx]
    kf = KFold(n_splits=split)
    for train_index, test_index in kf.split(proc_images):
        x_train, x_test = proc_images[train_index], proc_images[test_index]
        y_train, y_test = proc_labels[train_index], proc_labels[test_index]
        trainimages.append(x_train)
        testimages.append(x_test)
        trainlabels.append(y_train)
        testlabels.append(y_test)

    np.save("trainimages.npy", trainimages)
    np.save("testimages.npy", testimages)
    np.save("trainlabels.npy", trainlabels)
    np.save("testlabels.npy", testlabels)
    return(trainimages, testimages, trainlabels, testlabels)

cnn_expression_bn_dropout.py 文件源码项目：convneuralnetwork 作者: clutariomark 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def do_kfold(proc_images, proc_labels, split=10):
    trainimages = []
    trainlabels = []
    testimages = []
    testlabels = []
    rand_idx = random.sample(range(0, len(proc_images)), len(proc_images))
    proc_images = proc_images[rand_idx]
    proc_labels = proc_labels[rand_idx]
    kf = KFold(n_splits=split)
    for train_index, test_index in kf.split(proc_images):
        x_train, x_test = proc_images[train_index], proc_images[test_index]
        y_train, y_test = proc_labels[train_index], proc_labels[test_index]
        trainimages.append(x_train)
        testimages.append(x_test)
        trainlabels.append(y_train)
        testlabels.append(y_test)

    np.save("trainimages.npy", trainimages)
    np.save("testimages.npy", testimages)
    np.save("trainlabels.npy", trainlabels)
    np.save("testlabels.npy", testlabels)
    return(trainimages, testimages, trainlabels, testlabels)

cnn_expression_dropout.py 文件源码项目：convneuralnetwork 作者: clutariomark 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def do_kfold(proc_images, proc_labels, split=10):
    trainimages = []
    trainlabels = []
    testimages = []
    testlabels = []
    rand_idx = random.sample(range(0, len(proc_images)), len(proc_images))
    proc_images = proc_images[rand_idx]
    proc_labels = proc_labels[rand_idx]
    kf = KFold(n_splits=split)
    for train_index, test_index in kf.split(proc_images):
        x_train, x_test = proc_images[train_index], proc_images[test_index]
        y_train, y_test = proc_labels[train_index], proc_labels[test_index]
        trainimages.append(x_train)
        testimages.append(x_test)
        trainlabels.append(y_train)
        testlabels.append(y_test)

    np.save("trainimages.npy", trainimages)
    np.save("testimages.npy", testimages)
    np.save("trainlabels.npy", trainlabels)
    np.save("testlabels.npy", testlabels)
    return(trainimages, testimages, trainlabels, testlabels)

cnn_expression_batchnorm.py 文件源码项目：convneuralnetwork 作者: clutariomark 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def do_kfold(proc_images, proc_labels, split=10):
    trainimages = []
    trainlabels = []
    testimages = []
    testlabels = []
    rand_idx = random.sample(range(0, len(proc_images)), len(proc_images))
    proc_images = proc_images[rand_idx]
    proc_labels = proc_labels[rand_idx]
    kf = KFold(n_splits=split)
    for train_index, test_index in kf.split(proc_images):
        x_train, x_test = proc_images[train_index], proc_images[test_index]
        y_train, y_test = proc_labels[train_index], proc_labels[test_index]
        trainimages.append(x_train)
        testimages.append(x_test)
        trainlabels.append(y_train)
        testlabels.append(y_test)

    np.save("trainimages.npy", trainimages)
    np.save("testimages.npy", testimages)
    np.save("trainlabels.npy", trainlabels)
    np.save("testlabels.npy", testlabels)
    return(trainimages, testimages, trainlabels, testlabels)

TwitterClassificationProject.py 文件源码项目：twitter-text-classification 作者: FurkanArslan 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def testClassificationQuality(self):
        score = 0
        kfold = KFold(n_splits=10, shuffle=True, random_state=0)

        tweetClassification = TweetClassification()

        for ind_train, ind_test in kfold.split(self.tweets):
            dataTest = self.tweets[ind_test]
            dataTrain = self.tweets[ind_train]
            targetTest = self.target[ind_test]
            targetTrain = self.target[ind_train]

            tweetClassification.fit(dataTrain, targetTrain)

            score += tweetClassification.score(dataTest, targetTest)

        return score / 10

BenchmarkTests.py 文件源码项目：twitter-text-classification 作者: FurkanArslan 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def evaluate_cross_validation(self, clf, data, target, cluster):
        score = 0
        kfold = KFold(n_splits=cluster, shuffle=True, random_state=0)

        for ind_train, ind_test in kfold.split(data):
            dataTest  = data[ind_test]
            dataTrain = data[ind_train]
            targetTest = target[ind_test]
            targetTrain = target[ind_train]

            clf.fit(dataTrain, targetTrain)

            score += clf.score(dataTest, targetTest)

        print ('-'*30)
        print ("Mean score: %0.3f" % (score/10))
        print ('-'*30)

        return score/10

test_mlp_classifier.py 文件源码项目：muffnn 作者: civisanalytics 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96)

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def gs_numpy( method, X, Y, alphas_log = (-1, 1, 9), n_splits=5, n_jobs = -1, disp = True):
    """
    Grid search method with numpy array of X and Y
    Previously, np.mat are used for compatible with Matlab notation.    
    """
    if disp:
        print( X.shape, Y.shape)

    clf = getattr( linear_model, method)()
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf5_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf5 = kf5_c.split( X)
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = n_jobs)

    gs.fit( X, Y)

    return gs

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = svm.SVR( **svr_params)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def gs_classfier( classifier, xM, yVc, params, n_splits=5, n_jobs=-1):
    """
    gs = gs_classfier( classifier, xM, yVc, params, n_splits=5, n_jobs=-1)

    Inputs
    ======
    classifier = svm.SVC(), for example

    param = {"C": np.logspace(-2,2,5)}
    """
    #print(xM.shape, yVc.shape)
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True)
    gs = model_selection.GridSearchCV( classifier, params, cv=kf5_c, n_jobs=n_jobs)
    gs.fit( xM, yVc)

    return gs

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def gs_Ridge_BIKE( A_list, yV, XX = None, alphas_log = (1, -1, 9), n_splits = 5, n_jobs = -1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge( A_list, XX)
    parmas = {'alpha': np.logspace( *alphas_log)}
    ln = A_list[0].shape[0] # ls is the number of molecules.

    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf_n = kf5_ext_c.split( A_list[0])
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n_c, n_jobs = n_jobs)

    AX_idx = np.array([list(range( ln))]).T
    gs.fit( AX_idx, yV)

    return gs

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def gs_BIKE_Ridge( A_list, yV, alphas_log = (1, -1, 9), X_concat = None, n_splits = 5, n_jobs = -1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge( A_list, X_concat)
    parmas = {'alpha': np.logspace( *alphas_log)}
    ln = A_list[0].shape[0] # ls is the number of molecules.

    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf_n = kf5_ext_c.split( A_list[0])
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n_c, n_jobs = n_jobs)

    AX_idx = np.array([list(range( ln))]).T
    gs.fit( AX_idx, yV)

    return gs

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def cvLOO( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_splits = xM.shape[0]

    # print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = model_selection.KFold( xM.shape[0], n_splits=n_splits)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def gs_numpy( method, X, Y, alphas_log = (-1, 1, 9), n_splits=5, n_jobs = -1, disp = True):
    """
    Grid search method with numpy array of X and Y
    Previously, np.mat are used for compatible with Matlab notation.    
    """
    if disp:
        print( X.shape, Y.shape)

    clf = getattr( linear_model, method)()
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf5_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf5 = kf5_c.split( X)
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = n_jobs)

    gs.fit( X, Y)

    return gs

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = svm.SVR( **svr_params)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

kgrid.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def gs_param( model, X, y, param_grid, n_splits=5, shuffle=True, n_jobs=-1, graph=False):
    """
    gs = gs_param( model, X, y, param_grid, n_splits=5, shuffle=True, n_jobs=-1)

    Inputs
    ======
    model = svm.SVC(), or linear_model.LinearRegression(), for example
    param = {"C": np.logspace(-2,2,5)}
    """
    #print(xM.shape, yVc.shape)
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    gs = model_selection.GridSearchCV( model, param_grid, cv=kf5_c, n_jobs=n_jobs)
    gs.fit( X, y)

    if graph:
        plt.plot( gs.cv_results_["mean_train_score"], label='E[Train]')
        plt.plot( gs.cv_results_["mean_test_score"], label='E[Test]')
        plt.legend(loc=0)
        plt.grid()

    return gs