python类cross_val_score()的实例源码

Random_forest.py 文件源码 项目:Machine-Learning-Tools-on-Iris-Dataset 作者: debjitpaul 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def perform_random_forest(self,X_train_std,y_train,X_test_std, y_test): ## perform random forest

      rfc = RandomForestClassifier(n_estimators=10, max_depth=None,min_samples_split=2, random_state=0)

      # we create an instance of Neighbours Classifier and fit the data.
      rfc.fit(X_train_std, y_train)
      train_score=cross_val_score(rfc,X_train_std, y_train)
      print('The training accuracy is {:.2f}%'.format(train_score.mean()*100))
      test_score=cross_val_score(rfc,X_test_std, y_test)
      print('The test accuracy is {:.2f}%'.format(test_score.mean()*100))
      X=X_test_std
      y=y_test
      resolution=0.01
      #Z = svm.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      markers = ('s', 'x', 'o', '^', 'v')
      colors = ('red', 'blue', 'green', 'gray', 'cyan')
      cmap = ListedColormap(colors[:len(np.unique(y_test))])
      X=X_test_std
      y=y_test    
    # plot the decision surface
      x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
      x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
      xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))

      Z = rfc.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      Z = Z.reshape(xx1.shape)
      plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
      plt.xlim(xx1.min(), xx1.max())
      plt.ylim(xx2.min(), xx2.max())

      for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.5, c=cmap(idx),
                    marker=markers[idx], label=cl)
      plt.show()
feature_engineering.py 文件源码 项目:LSAT 作者: BillVanderLugt 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def CV_eval(model, X, y):
    '''
    Perform 8-fold cross-validation

    Input: model, X data, Y data
    Return: mean of cross-val accuracy scores
    '''
    scores = cross_val_score(model, X, y, cv=8)
    pprint (scores)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    return scores.mean()
handle_missing.py 文件源码 项目:playground 作者: Pennsy 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def knn(data, predict=False, best_n=None):
    if best_n:
        # prediction
        clf = KNeighborsClassifier(n_neighbors=best_n)
        return clf
    knn_scores = []
    for n_neighbors in range(4, 51):
        clf = KNeighborsClassifier(n_neighbors=n_neighbors)
        scores = cross_val_score(clf, data.X_train, data.y_train, cv=5)
        knn_scores.append((n_neighbors, scores.mean()))
    knn_scores = sorted(knn_scores, key=lambda x: x[1], reverse=True)
    print(knn_scores)
marketing_predict.py 文件源码 项目:playground 作者: Pennsy 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def svm_clf(data):
    clf = svm.LinearSVC(C=1)
    for i in range(5):
        scores = cross_val_score(clf, data.X_train, data.y_train, cv=10)
        print("iteration",i, "svm mean:", scores.mean())
        scores = list(scores)
        print("svm train scores:\n", scores)
    return clf


# use knn for impute missing values
marketing_predict.py 文件源码 项目:playground 作者: Pennsy 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def knn(data, predict=False):
    n_neighbors = 3
    clf = KNeighborsClassifier(n_neighbors=n_neighbors)
    for i in range(5):
        scores = cross_val_score(clf, data.X_train, data.y_train, cv=10)
        print("svm mean:", scores.mean())
        scores = list(scores)
        print("svm train scores:\n", scores)

    # prediction
    best_n = n_neighbors
    clf = KNeighborsClassifier(n_neighbors=best_n)
    return clf
sklearn-LR_prov.py 文件源码 项目:forward 作者: yajun0601 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def regression(filename):
    from sklearn.cross_validation import train_test_split
    print(filename)
    X,y = loadDataSet(filename)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    from sklearn.linear_model import LinearRegression
    from sklearn import metrics
    linreg = LinearRegression()
    linreg.fit(X_train, y_train)

#    print(linreg.intercept_, linreg.coef_)
    # pair the feature names with the coefficients
    feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????']
    #print(feature_cols, linreg.coef_)
    #zip(feature_cols, linreg.coef_)
    y_pred = linreg.predict(X_test)

    print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
    print("MSE:",metrics.mean_squared_error(y_test, y_pred))
    print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    scores = cross_val_score(linreg, X, y,cv=5)
#    print(filename)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    res = pd.DataFrame(linreg.coef_,columns=feature_cols,index=[filename])
    return (res)

#files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx']
lasso.py 文件源码 项目:forward 作者: yajun0601 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def regression(filename):
    from sklearn.linear_model import LinearRegression
    from sklearn import metrics

    X,y = loadDataSet(filename)
    print(filename,X.shape)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.25)
    linreg = LinearRegression()
    linreg.fit(X_train, y_train)

#    print(linreg.intercept_, linreg.coef_)
    # pair the feature names with the coefficients
    feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????']
#    feature_cols = ['????', '??????','?????','??????','???????','???????','?????????','??????']

    #print(feature_cols, linreg.coef_)
    #zip(feature_cols, linreg.coef_)
    y_pred = linreg.predict(X_test)

    print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
    print("MSE:",metrics.mean_squared_error(y_test, y_pred))
    print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    scores = cross_val_score(linreg, X, y,cv=3)
    print('scores:',scores)  
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    res = pd.DataFrame(linreg.coef_.T[:len(feature_cols)].T,columns=feature_cols,index=[filename.split('.')[0]])
#    res = pd.DataFrame(linreg.coef_,index=[filename.split('.')[0]])
    return (res)

#files = ['201603.xlsx','201604.xlsx','201605.xlsx','?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx']
#files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx','201703_06.xlsx']
#files = ['201703_06.xlsx']
SVM_Trainer.py 文件源码 项目:Spam-Message-Classifier-sklearn 作者: ZPdesu 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cross_validation(self):
        cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
        scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
        print scores
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
SVM_Trainer.py 文件源码 项目:Spam-Message-Classifier-sklearn 作者: ZPdesu 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def cross_validation(self):
        cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
        scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
        print scores
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
bayes_Trainer.py 文件源码 项目:Spam-Message-Classifier-sklearn 作者: ZPdesu 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cross_validation(self):
        cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
        scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
        print scores
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
motor_imagery.py 文件源码 项目:moabb 作者: NeuroTechX 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def score(self, clf, X, y, groups, n_jobs=1):
        """get the score"""
        if len(np.unique(groups)) > 1:
            # if group as different values, use group
            cv = LeaveOneGroupOut()
        else:
            # else use kfold
            cv = KFold(5, shuffle=True, random_state=45)

        auc = cross_val_score(clf, X, y, groups=groups, cv=cv,
                              scoring='accuracy', n_jobs=n_jobs)
        return auc.mean()
motor_imagery.py 文件源码 项目:moabb 作者: NeuroTechX 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def score(self, clf, X, y, groups, n_jobs=1):
        """get the score"""
        if len(np.unique(groups)) > 1:
            # if group as different values, use group
            cv = LeaveOneGroupOut()
        else:
            # else use kfold
            cv = KFold(5, shuffle=True, random_state=45)

        auc = cross_val_score(clf, X, y, groups=groups, cv=cv,
                              scoring='roc_auc', n_jobs=n_jobs)
        return auc.mean()
lasso_regression.py 文件源码 项目:House-Pricing 作者: playing-kaggle 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def rmse_cv(model, X , y):
    rmse= np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv = 5))
    return(rmse)


#%%
linear_model.py 文件源码 项目:House-Pricing 作者: playing-kaggle 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def rmse_cv(model, X, Y):
    rmse = np.sqrt(-cross_val_score(model, X, Y, scoring=scorer, cv=10))
    return (rmse)
tbs_ml.py 文件源码 项目:eezzy 作者: 3Blades 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def check_model(model, splits, X, y):
    model_scores = cross_val_score(model, X, y, cv=splits,
    scoring='neg_mean_absolute_error')
    return sum(model_scores) / len(model_scores)
Stock_Prediction_Model_Random_Forrest.py 文件源码 项目:StockRecommendSystem 作者: doncat99 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def perform_CV(self, X_train, y_train, number_folds, n, m):
        model = RandomForestClassifier(n_estimators=n, max_features=m, n_jobs=8, verbose=self.paras.verbose)
        acc = np.mean(cross_val_score(model, X_train, y_train, cv=number_folds))
        #print 'Size of Forrest : number of trees : ' + str(n) + ', maximum of features : ' + str(m) + '. Accuracy : ' + str(acc)
        return acc

    # MODEL SELECTION : Find best parameters ######################################
    ## Inputs :  X_train, y_train, number of folds, range of number of trees, range of max of features
    ## Outputs : optimal number of trees, optimal max of features, accuracy
personality_classifier.py 文件源码 项目:personality 作者: nlp-psych 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def predict_trait(X, Y):
    scores = cross_val_score(svm.SVC(), X, Y, scoring='accuracy', cv=10)
    return scores.mean()
situacao_do_cliente_kfold.py 文件源码 项目:machine-learning 作者: guilhermesilveira 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
    k = 10
    scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
    taxa_de_acerto = np.mean(scores)
    msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
    print(msg)
    return taxa_de_acerto
situacao_do_cliente_kfold.py 文件源码 项目:machine-learning 作者: guilhermesilveira 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
    k = 10
    scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
    taxa_de_acerto = np.mean(scores)
    msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
    print(msg)
    return taxa_de_acerto
classificando_emails.py 文件源码 项目:machine-learning 作者: guilhermesilveira 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
    k = 10
    scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
    taxa_de_acerto = np.mean(scores)
    msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
    print(msg)
    return taxa_de_acerto


问题


面经


文章

微信
公众号

扫码关注公众号