def perform_random_forest(self,X_train_std,y_train,X_test_std, y_test): ## perform random forest
rfc = RandomForestClassifier(n_estimators=10, max_depth=None,min_samples_split=2, random_state=0)
# we create an instance of Neighbours Classifier and fit the data.
rfc.fit(X_train_std, y_train)
train_score=cross_val_score(rfc,X_train_std, y_train)
print('The training accuracy is {:.2f}%'.format(train_score.mean()*100))
test_score=cross_val_score(rfc,X_test_std, y_test)
print('The test accuracy is {:.2f}%'.format(test_score.mean()*100))
X=X_test_std
y=y_test
resolution=0.01
#Z = svm.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
markers = ('s', 'x', 'o', '^', 'v')
colors = ('red', 'blue', 'green', 'gray', 'cyan')
cmap = ListedColormap(colors[:len(np.unique(y_test))])
X=X_test_std
y=y_test
# plot the decision surface
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
np.arange(x2_min, x2_max, resolution))
Z = rfc.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
Z = Z.reshape(xx1.shape)
plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
plt.xlim(xx1.min(), xx1.max())
plt.ylim(xx2.min(), xx2.max())
for idx, cl in enumerate(np.unique(y)):
plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
alpha=0.5, c=cmap(idx),
marker=markers[idx], label=cl)
plt.show()
python类cross_val_score()的实例源码
Random_forest.py 文件源码
项目:Machine-Learning-Tools-on-Iris-Dataset
作者: debjitpaul
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def CV_eval(model, X, y):
'''
Perform 8-fold cross-validation
Input: model, X data, Y data
Return: mean of cross-val accuracy scores
'''
scores = cross_val_score(model, X, y, cv=8)
pprint (scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
return scores.mean()
def knn(data, predict=False, best_n=None):
if best_n:
# prediction
clf = KNeighborsClassifier(n_neighbors=best_n)
return clf
knn_scores = []
for n_neighbors in range(4, 51):
clf = KNeighborsClassifier(n_neighbors=n_neighbors)
scores = cross_val_score(clf, data.X_train, data.y_train, cv=5)
knn_scores.append((n_neighbors, scores.mean()))
knn_scores = sorted(knn_scores, key=lambda x: x[1], reverse=True)
print(knn_scores)
def svm_clf(data):
clf = svm.LinearSVC(C=1)
for i in range(5):
scores = cross_val_score(clf, data.X_train, data.y_train, cv=10)
print("iteration",i, "svm mean:", scores.mean())
scores = list(scores)
print("svm train scores:\n", scores)
return clf
# use knn for impute missing values
def knn(data, predict=False):
n_neighbors = 3
clf = KNeighborsClassifier(n_neighbors=n_neighbors)
for i in range(5):
scores = cross_val_score(clf, data.X_train, data.y_train, cv=10)
print("svm mean:", scores.mean())
scores = list(scores)
print("svm train scores:\n", scores)
# prediction
best_n = n_neighbors
clf = KNeighborsClassifier(n_neighbors=best_n)
return clf
def regression(filename):
from sklearn.cross_validation import train_test_split
print(filename)
X,y = loadDataSet(filename)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
from sklearn.linear_model import LinearRegression
from sklearn import metrics
linreg = LinearRegression()
linreg.fit(X_train, y_train)
# print(linreg.intercept_, linreg.coef_)
# pair the feature names with the coefficients
feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????']
#print(feature_cols, linreg.coef_)
#zip(feature_cols, linreg.coef_)
y_pred = linreg.predict(X_test)
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
scores = cross_val_score(linreg, X, y,cv=5)
# print(filename)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
res = pd.DataFrame(linreg.coef_,columns=feature_cols,index=[filename])
return (res)
#files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx']
def regression(filename):
from sklearn.linear_model import LinearRegression
from sklearn import metrics
X,y = loadDataSet(filename)
print(filename,X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.25)
linreg = LinearRegression()
linreg.fit(X_train, y_train)
# print(linreg.intercept_, linreg.coef_)
# pair the feature names with the coefficients
feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????']
# feature_cols = ['????', '??????','?????','??????','???????','???????','?????????','??????']
#print(feature_cols, linreg.coef_)
#zip(feature_cols, linreg.coef_)
y_pred = linreg.predict(X_test)
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
scores = cross_val_score(linreg, X, y,cv=3)
print('scores:',scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
res = pd.DataFrame(linreg.coef_.T[:len(feature_cols)].T,columns=feature_cols,index=[filename.split('.')[0]])
# res = pd.DataFrame(linreg.coef_,index=[filename.split('.')[0]])
return (res)
#files = ['201603.xlsx','201604.xlsx','201605.xlsx','?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx']
#files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx','201703_06.xlsx']
#files = ['201703_06.xlsx']
def cross_validation(self):
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
print scores
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
def cross_validation(self):
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
print scores
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
def cross_validation(self):
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
print scores
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
def score(self, clf, X, y, groups, n_jobs=1):
"""get the score"""
if len(np.unique(groups)) > 1:
# if group as different values, use group
cv = LeaveOneGroupOut()
else:
# else use kfold
cv = KFold(5, shuffle=True, random_state=45)
auc = cross_val_score(clf, X, y, groups=groups, cv=cv,
scoring='accuracy', n_jobs=n_jobs)
return auc.mean()
def score(self, clf, X, y, groups, n_jobs=1):
"""get the score"""
if len(np.unique(groups)) > 1:
# if group as different values, use group
cv = LeaveOneGroupOut()
else:
# else use kfold
cv = KFold(5, shuffle=True, random_state=45)
auc = cross_val_score(clf, X, y, groups=groups, cv=cv,
scoring='roc_auc', n_jobs=n_jobs)
return auc.mean()
def rmse_cv(model, X , y):
rmse= np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv = 5))
return(rmse)
#%%
def rmse_cv(model, X, Y):
rmse = np.sqrt(-cross_val_score(model, X, Y, scoring=scorer, cv=10))
return (rmse)
def check_model(model, splits, X, y):
model_scores = cross_val_score(model, X, y, cv=splits,
scoring='neg_mean_absolute_error')
return sum(model_scores) / len(model_scores)
Stock_Prediction_Model_Random_Forrest.py 文件源码
项目:StockRecommendSystem
作者: doncat99
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def perform_CV(self, X_train, y_train, number_folds, n, m):
model = RandomForestClassifier(n_estimators=n, max_features=m, n_jobs=8, verbose=self.paras.verbose)
acc = np.mean(cross_val_score(model, X_train, y_train, cv=number_folds))
#print 'Size of Forrest : number of trees : ' + str(n) + ', maximum of features : ' + str(m) + '. Accuracy : ' + str(acc)
return acc
# MODEL SELECTION : Find best parameters ######################################
## Inputs : X_train, y_train, number of folds, range of number of trees, range of max of features
## Outputs : optimal number of trees, optimal max of features, accuracy
def predict_trait(X, Y):
scores = cross_val_score(svm.SVC(), X, Y, scoring='accuracy', cv=10)
return scores.mean()
situacao_do_cliente_kfold.py 文件源码
项目:machine-learning
作者: guilhermesilveira
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
k = 10
scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
taxa_de_acerto = np.mean(scores)
msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
print(msg)
return taxa_de_acerto
situacao_do_cliente_kfold.py 文件源码
项目:machine-learning
作者: guilhermesilveira
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
k = 10
scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
taxa_de_acerto = np.mean(scores)
msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
print(msg)
return taxa_de_acerto
classificando_emails.py 文件源码
项目:machine-learning
作者: guilhermesilveira
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
k = 10
scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
taxa_de_acerto = np.mean(scores)
msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
print(msg)
return taxa_de_acerto