python类cross_val_score()的实例源码

movies.py 文件源码 项目:ml-talks-duolingo 作者: burrsettles 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def experiment(model_class, vectorizer, xval):
    name = model_class.__class__.__name__
    model = model_class.fit(X, y)
    model_weights = vectorizer.inverse_transform(model.coef_)[0]
    with open('weights.%s.txt' % name, 'w') as f:
        f.write('%s\t%f\n' % ('(intercept)', model.intercept_))
        f.writelines('%s\t%f\n' % k for k in model_weights.items())
    r2_scores = cross_validation.cross_val_score(model, X, y, scoring='r2', cv=xval)
    mae_scores = cross_validation.cross_val_score(model, X, y, scoring='mean_absolute_error', cv=xval)
    print '-'*80
    print 'r2\t%.4f\t%s' % (np.mean(r2_scores), name)
    print 'mae\t%.4f\t%s' % (np.mean(mae_scores), name)
main.py 文件源码 项目:coursera-machine-learning-yandex 作者: dstarcev 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def calculate(X, y):
    best_p, best_score = 0, -float('inf')
    kf = KFold(len(y), n_folds=5, shuffle=True, random_state=42)
    for p in numpy.linspace(1, 10, num=200):
        knr = KNeighborsRegressor(n_neighbors=5, weights='distance', p=p)
        score = max(cross_val_score(knr, X, y, cv=kf, scoring='mean_squared_error'))
        if score > best_score:
            best_score = score
            best_p = p

    return best_p, best_score
main.py 文件源码 项目:coursera-machine-learning-yandex 作者: dstarcev 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def calculate(X, y):
    kf = KFold(len(data), n_folds=5, shuffle=True, random_state=42)
    best_k, best_score = 0, 0
    for k in xrange(1, 51):
        knn = KNeighborsClassifier(n_neighbors=k)
        score = cross_val_score(knn, X, y, cv=kf, scoring='accuracy').mean()
        if score > best_score:
            best_score = score
            best_k = k
    return best_k, best_score
main.py 文件源码 项目:coursera-machine-learning-yandex 作者: dstarcev 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def calculate(X, y, threshold):
    best_t, best_score = 0, -float('inf')
    kf = KFold(len(y), n_folds=5, random_state=1, shuffle=True)
    for t in xrange(1, 51):
        clf = RandomForestRegressor(n_estimators=t, random_state=1)
        score = np.mean(cross_val_score(clf, X, y, cv=kf, scoring='r2'))
        if score > threshold:
            return t
image-classification.py 文件源码 项目:Building-Machine-Learning-Systems-With-Python-Second-Edition 作者: PacktPublishing 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def accuracy(features, labels):
    from sklearn.linear_model import LogisticRegression
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn import cross_validation
    # We use logistic regression because it is very fast.
    # Feel free to experiment with other classifiers
    clf = Pipeline([('preproc', StandardScaler()),
                ('classifier', LogisticRegression())])
    cv = cross_validation.LeaveOneOut(len(features))
    scores = cross_validation.cross_val_score(
        clf, features, labels, cv=cv)
    return scores.mean()
__init__.py 文件源码 项目:mlprojects-py 作者: srinathperera 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def regression_with_GBR(X_train, y_train, X_test, y_test, parmsFromNormalization, params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
          'learning_rate': 0.01, 'loss': 'ls'}):
        #GradientBoostingRegressor
    gfr = GradientBoostingRegressor(**params)
    gfr.fit(X_train, y_train)
    y_pred_gbr = gfr.predict(X_test)
    print_regression_model_summary("GBR", y_test, y_pred_gbr, parmsFromNormalization)
    print_feature_importance(X_test, y_test,gfr.feature_importances_)

    #cross validation ( not sure this make sense for regression
    #http://scikit-learn.org/stable/modules/cross_validation.html
    #gfr = GradientBoostingRegressor(**params)
    #scores = cross_validation.cross_val_score(gfr, X_train, y_train, cv=5)
    #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    return y_pred_gbr
training.py 文件源码 项目:static-gesture-recognition 作者: windmark 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def crossValidateModel(self):
    (label_vector, input_vector) = loadData(self.featureFile)
    kFold = 5

    kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance')
    scores = cross_validation.cross_val_score(kNNClassifier, input_vector, label_vector, cv = kFold)

    print("\n----- k-fold Cross Validation -----")
    print(scores)
    print("Average: ", sum(scores) / len(scores))
optimizer.py 文件源码 项目:KDDCUP2016 作者: hugochan 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def cv(self, estimator_params):

                if self.ptypes != None:

                        if self.ptypes == 'int':
                                for key in estimator_params.keys():
                                        estimator_params[key] = int(estimator_params[key])

                        else:
                                for key in self.ptypes.keys():
                                        estimator_params[key] = self.ptypes[key](estimator_params[key])

                if self.pfixed != None:
                        for key in self.pfixed.keys():
                                estimator_params[key] = self.pfixed[key]

                if self.plist != None:
                        for key in self.plist.keys():
                                estimator_params[key] = self.plist[key][int(estimator_params[key]) - 1]


                self.estimator.set_params(**estimator_params)
                v = self.estimator.evaluate(self.cv_params['X'])
                return v

#               self.cv_params['estimator'] = estim


#               cvscore = cross_val_score(**self.cv_params)
#               return numpy.mean(cvscore)

        # --------------------------------------------- // --------------------------------------------- #
test_models.py 文件源码 项目:pines 作者: dmitru 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def test_iris(self):
        dataset = load_iris()
        score = np.mean(cross_val_score(
                DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
        print('iris: tree_type: {}, score = {}'.format(self.tree_type, score))
        self.assertTrue(score > 0.8)
test_models.py 文件源码 项目:pines 作者: dmitru 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_breast_cancer(self):
        dataset = load_breast_cancer()
        score = np.mean(cross_val_score(
                DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
        print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
        self.assertTrue(score > 0.8)
test_models.py 文件源码 项目:pines 作者: dmitru 项目源码 文件源码 阅读 46 收藏 0 点赞 0 评论 0
def test_iris(self):
        dataset = load_iris()
        score = np.mean(cross_val_score(
                DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
        self.assertTrue(score > 0.8)
        print('iris: tree_type: {}, score = {}'.format(self.tree_type, score))
test_models.py 文件源码 项目:pines 作者: dmitru 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_breast_cancer(self):
        dataset = load_breast_cancer()
        score = np.mean(cross_val_score(
                DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
        self.assertTrue(score > 0.8)
        print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
train.py 文件源码 项目:digit-ocr 作者: Nozdi 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cv(model, X, y, n_iter=5, test_size=0.3):
    split = cross_validation.ShuffleSplit(
        len(X), n_iter=n_iter, test_size=test_size,
    )
    return cross_validation.cross_val_score(model, X, y, cv=split,
                                            scoring='accuracy', n_jobs=-1)
util.py 文件源码 项目:sentiment-analysis 作者: saber1988 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def random_forest_classify(my_train_data, my_train_label, my_test_data, estimators):
    clf = RandomForestClassifier(n_estimators=estimators)
    scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5)
    print("random forest(%d) accuracy: %0.3f (+/- %0.3f)" % (estimators, scores.mean(), scores.std() * 2))
    clf.fit(my_train_data, my_train_label)
    my_test_label = clf.predict(my_test_data)
    file_name = "random_forest_%d.csv" % estimators
    save_data(my_test_label, file_name)
util.py 文件源码 项目:sentiment-analysis 作者: saber1988 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def gradient_boosting_classify(my_train_data, my_train_label, my_test_data, estimators):
    clf = GradientBoostingClassifier(n_estimators=estimators)
    scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5)
    print("gradient boosting(%d) accuracy: %0.3f (+/- %0.3f)" % (estimators, scores.mean(), scores.std() * 2))
    clf.fit(my_train_data, my_train_label)
    my_test_label = clf.predict(my_test_data)
    file_name = "gradient_boosting_%d.csv" % estimators
    save_data(my_test_label, file_name)
util.py 文件源码 项目:sentiment-analysis 作者: saber1988 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def svc_classify(my_train_data, my_train_label, my_test_data, svc_c):
    # clf = svm.SVC(C=svc_c, kernel='poly')
    clf = svm.SVC(C=svc_c)
    scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5)
    print("svc(C=%.1f) accuracy: %0.3f (+/- %0.3f)" % (svc_c, scores.mean(), scores.std() * 2))
    clf.fit(my_train_data, my_train_label)
    my_test_label = clf.predict(my_test_data)
    file_name = "svc_%.1f.csv" % svc_c
    save_data(my_test_label, file_name)
MLNPCapstone.py 文件源码 项目:machine-learning-nanodegree-program-capstone 作者: harrylippy 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def cross_validate(self):
        clf = self._clf[self._learner]
        (X_train, y_train) = self._train_data

        print " + Cross-validating classifier (learner = %s)..." \
            % self._learner,; stdout.flush()
        scores = cross_val_score(
                        self._clf[self._learner],
                        X_train, y_train,
                        scoring=make_scorer(roc_auc_score),
                        cv=3)
        print "done.\n   * Scores: %r" % scores
Models.py 文件源码 项目:Stock-Prediction-Time-Series-Analysis-Python 作者: Nekooeimehr 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def First_Model_SVR(Scaled_Input_Data, Output_Data):
    T0 = time.time()
    n = len(Scaled_Input_Data)
    Grid_Dict = {"C": [1e-2, 1e-1,1e0, 1e1, 1e2],"gamma": np.logspace(-4, 2, 6)}
    svr_Tuned = GridSearchCV(SVR(kernel='rbf', gamma=0.1, tol = 0.005), cv=5,param_grid=Grid_Dict, scoring="mean_absolute_error")
    svr_Tuned.fit(Scaled_Input_Data, Output_Data)
    SVR_MSE = SVR(kernel='rbf', C=svr_Tuned.best_params_['C'], gamma=svr_Tuned.best_params_['gamma'], tol = 0.01)
    SVR_Time = time.time() - T0
    print('The computational time of Radial based Support Vector Regression for ', n, ' examples is: ', SVR_Time)
    MSEs_SVR = cross_validation.cross_val_score(SVR_MSE, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error")
    MeanMSE_SVR = np.mean(list(MSEs_SVR))
    print('The average MSE of Radial based Support Vector Regression for ', n, ' examples is: ', (-1*MeanMSE_SVR))
    return(MeanMSE_SVR, svr_Tuned)
Models.py 文件源码 项目:Stock-Prediction-Time-Series-Analysis-Python 作者: Nekooeimehr 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def RF_Model(Scaled_Input_Data, Output_Data):
    T0 = time.time()
    n = len(Scaled_Input_Data)
    RFModel = RandomForestRegressor()
    RFModel.fit(Scaled_Input_Data, Output_Data)
    RF_Time = time.time() - T0
    print('The computational time of Random Forest Regression for ', n, ' examples is: ', RF_Time)
    MSEs_RF = cross_validation.cross_val_score(RFModel, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error")
    MeanMSE_RF = np.mean(list(MSEs_RF))
    print('The average MSE of Random Forest Regression for ', n, ' examples is: ', (-1*MeanMSE_RF))
    return(MeanMSE_RF, RFModel)
fp1_double_neural_hypopt_rxn_predict.py 文件源码 项目:neural_reaction_fingerprint 作者: jnwei 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def hyperopt_train_test(params):
    clf = rxn_estimator(np.float32(params[0]), np.float32(params[1]), np.int(params[2]), other_param_dict)
    return cross_val_score(clf, X, y, cv=3).mean()


问题


面经


文章

微信
公众号

扫码关注公众号