def experiment(model_class, vectorizer, xval):
name = model_class.__class__.__name__
model = model_class.fit(X, y)
model_weights = vectorizer.inverse_transform(model.coef_)[0]
with open('weights.%s.txt' % name, 'w') as f:
f.write('%s\t%f\n' % ('(intercept)', model.intercept_))
f.writelines('%s\t%f\n' % k for k in model_weights.items())
r2_scores = cross_validation.cross_val_score(model, X, y, scoring='r2', cv=xval)
mae_scores = cross_validation.cross_val_score(model, X, y, scoring='mean_absolute_error', cv=xval)
print '-'*80
print 'r2\t%.4f\t%s' % (np.mean(r2_scores), name)
print 'mae\t%.4f\t%s' % (np.mean(mae_scores), name)
python类cross_val_score()的实例源码
def calculate(X, y):
best_p, best_score = 0, -float('inf')
kf = KFold(len(y), n_folds=5, shuffle=True, random_state=42)
for p in numpy.linspace(1, 10, num=200):
knr = KNeighborsRegressor(n_neighbors=5, weights='distance', p=p)
score = max(cross_val_score(knr, X, y, cv=kf, scoring='mean_squared_error'))
if score > best_score:
best_score = score
best_p = p
return best_p, best_score
def calculate(X, y):
kf = KFold(len(data), n_folds=5, shuffle=True, random_state=42)
best_k, best_score = 0, 0
for k in xrange(1, 51):
knn = KNeighborsClassifier(n_neighbors=k)
score = cross_val_score(knn, X, y, cv=kf, scoring='accuracy').mean()
if score > best_score:
best_score = score
best_k = k
return best_k, best_score
def calculate(X, y, threshold):
best_t, best_score = 0, -float('inf')
kf = KFold(len(y), n_folds=5, random_state=1, shuffle=True)
for t in xrange(1, 51):
clf = RandomForestRegressor(n_estimators=t, random_state=1)
score = np.mean(cross_val_score(clf, X, y, cv=kf, scoring='r2'))
if score > threshold:
return t
image-classification.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def accuracy(features, labels):
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import cross_validation
# We use logistic regression because it is very fast.
# Feel free to experiment with other classifiers
clf = Pipeline([('preproc', StandardScaler()),
('classifier', LogisticRegression())])
cv = cross_validation.LeaveOneOut(len(features))
scores = cross_validation.cross_val_score(
clf, features, labels, cv=cv)
return scores.mean()
def regression_with_GBR(X_train, y_train, X_test, y_test, parmsFromNormalization, params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
'learning_rate': 0.01, 'loss': 'ls'}):
#GradientBoostingRegressor
gfr = GradientBoostingRegressor(**params)
gfr.fit(X_train, y_train)
y_pred_gbr = gfr.predict(X_test)
print_regression_model_summary("GBR", y_test, y_pred_gbr, parmsFromNormalization)
print_feature_importance(X_test, y_test,gfr.feature_importances_)
#cross validation ( not sure this make sense for regression
#http://scikit-learn.org/stable/modules/cross_validation.html
#gfr = GradientBoostingRegressor(**params)
#scores = cross_validation.cross_val_score(gfr, X_train, y_train, cv=5)
#print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
return y_pred_gbr
def crossValidateModel(self):
(label_vector, input_vector) = loadData(self.featureFile)
kFold = 5
kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance')
scores = cross_validation.cross_val_score(kNNClassifier, input_vector, label_vector, cv = kFold)
print("\n----- k-fold Cross Validation -----")
print(scores)
print("Average: ", sum(scores) / len(scores))
def cv(self, estimator_params):
if self.ptypes != None:
if self.ptypes == 'int':
for key in estimator_params.keys():
estimator_params[key] = int(estimator_params[key])
else:
for key in self.ptypes.keys():
estimator_params[key] = self.ptypes[key](estimator_params[key])
if self.pfixed != None:
for key in self.pfixed.keys():
estimator_params[key] = self.pfixed[key]
if self.plist != None:
for key in self.plist.keys():
estimator_params[key] = self.plist[key][int(estimator_params[key]) - 1]
self.estimator.set_params(**estimator_params)
v = self.estimator.evaluate(self.cv_params['X'])
return v
# self.cv_params['estimator'] = estim
# cvscore = cross_val_score(**self.cv_params)
# return numpy.mean(cvscore)
# --------------------------------------------- // --------------------------------------------- #
def test_iris(self):
dataset = load_iris()
score = np.mean(cross_val_score(
DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
print('iris: tree_type: {}, score = {}'.format(self.tree_type, score))
self.assertTrue(score > 0.8)
def test_breast_cancer(self):
dataset = load_breast_cancer()
score = np.mean(cross_val_score(
DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
self.assertTrue(score > 0.8)
def test_iris(self):
dataset = load_iris()
score = np.mean(cross_val_score(
DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
self.assertTrue(score > 0.8)
print('iris: tree_type: {}, score = {}'.format(self.tree_type, score))
def test_breast_cancer(self):
dataset = load_breast_cancer()
score = np.mean(cross_val_score(
DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
self.assertTrue(score > 0.8)
print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
def cv(model, X, y, n_iter=5, test_size=0.3):
split = cross_validation.ShuffleSplit(
len(X), n_iter=n_iter, test_size=test_size,
)
return cross_validation.cross_val_score(model, X, y, cv=split,
scoring='accuracy', n_jobs=-1)
def random_forest_classify(my_train_data, my_train_label, my_test_data, estimators):
clf = RandomForestClassifier(n_estimators=estimators)
scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5)
print("random forest(%d) accuracy: %0.3f (+/- %0.3f)" % (estimators, scores.mean(), scores.std() * 2))
clf.fit(my_train_data, my_train_label)
my_test_label = clf.predict(my_test_data)
file_name = "random_forest_%d.csv" % estimators
save_data(my_test_label, file_name)
def gradient_boosting_classify(my_train_data, my_train_label, my_test_data, estimators):
clf = GradientBoostingClassifier(n_estimators=estimators)
scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5)
print("gradient boosting(%d) accuracy: %0.3f (+/- %0.3f)" % (estimators, scores.mean(), scores.std() * 2))
clf.fit(my_train_data, my_train_label)
my_test_label = clf.predict(my_test_data)
file_name = "gradient_boosting_%d.csv" % estimators
save_data(my_test_label, file_name)
def svc_classify(my_train_data, my_train_label, my_test_data, svc_c):
# clf = svm.SVC(C=svc_c, kernel='poly')
clf = svm.SVC(C=svc_c)
scores = cross_validation.cross_val_score(clf, my_train_data, my_train_label, cv=5)
print("svc(C=%.1f) accuracy: %0.3f (+/- %0.3f)" % (svc_c, scores.mean(), scores.std() * 2))
clf.fit(my_train_data, my_train_label)
my_test_label = clf.predict(my_test_data)
file_name = "svc_%.1f.csv" % svc_c
save_data(my_test_label, file_name)
MLNPCapstone.py 文件源码
项目:machine-learning-nanodegree-program-capstone
作者: harrylippy
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def cross_validate(self):
clf = self._clf[self._learner]
(X_train, y_train) = self._train_data
print " + Cross-validating classifier (learner = %s)..." \
% self._learner,; stdout.flush()
scores = cross_val_score(
self._clf[self._learner],
X_train, y_train,
scoring=make_scorer(roc_auc_score),
cv=3)
print "done.\n * Scores: %r" % scores
Models.py 文件源码
项目:Stock-Prediction-Time-Series-Analysis-Python
作者: Nekooeimehr
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def First_Model_SVR(Scaled_Input_Data, Output_Data):
T0 = time.time()
n = len(Scaled_Input_Data)
Grid_Dict = {"C": [1e-2, 1e-1,1e0, 1e1, 1e2],"gamma": np.logspace(-4, 2, 6)}
svr_Tuned = GridSearchCV(SVR(kernel='rbf', gamma=0.1, tol = 0.005), cv=5,param_grid=Grid_Dict, scoring="mean_absolute_error")
svr_Tuned.fit(Scaled_Input_Data, Output_Data)
SVR_MSE = SVR(kernel='rbf', C=svr_Tuned.best_params_['C'], gamma=svr_Tuned.best_params_['gamma'], tol = 0.01)
SVR_Time = time.time() - T0
print('The computational time of Radial based Support Vector Regression for ', n, ' examples is: ', SVR_Time)
MSEs_SVR = cross_validation.cross_val_score(SVR_MSE, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error")
MeanMSE_SVR = np.mean(list(MSEs_SVR))
print('The average MSE of Radial based Support Vector Regression for ', n, ' examples is: ', (-1*MeanMSE_SVR))
return(MeanMSE_SVR, svr_Tuned)
Models.py 文件源码
项目:Stock-Prediction-Time-Series-Analysis-Python
作者: Nekooeimehr
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def RF_Model(Scaled_Input_Data, Output_Data):
T0 = time.time()
n = len(Scaled_Input_Data)
RFModel = RandomForestRegressor()
RFModel.fit(Scaled_Input_Data, Output_Data)
RF_Time = time.time() - T0
print('The computational time of Random Forest Regression for ', n, ' examples is: ', RF_Time)
MSEs_RF = cross_validation.cross_val_score(RFModel, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error")
MeanMSE_RF = np.mean(list(MSEs_RF))
print('The average MSE of Random Forest Regression for ', n, ' examples is: ', (-1*MeanMSE_RF))
return(MeanMSE_RF, RFModel)
fp1_double_neural_hypopt_rxn_predict.py 文件源码
项目:neural_reaction_fingerprint
作者: jnwei
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def hyperopt_train_test(params):
clf = rxn_estimator(np.float32(params[0]), np.float32(params[1]), np.int(params[2]), other_param_dict)
return cross_val_score(clf, X, y, cv=3).mean()