def k_vs_rms(START_K, END_K, STEP_K, training_data, labels, test_data, expected_labels, weights='distance'):
num_points = int((END_K - START_K) / STEP_K) + 1
points = np.zeros([num_points, 2])
index = -1
for K in range(START_K, END_K, STEP_K):
print "k = " + str(K)
index += 1
output = knn_regression(K, training_data, labels, test_data, weights)
v = np.column_stack((output, expected_labels))
v = v[~np.isnan(v[:,0]),:]
RMSE = mean_squared_error(v[:,0], v[:,1])**0.5
points[index,0] = K
points[index,1] = RMSE
if points[-1,0] == 0 and points[-1,1] == 0:
points = points[:-1,:]
return points
# Test parameters
python类mean_squared_error()的实例源码
def train(df_train, df_test):
train_x, train_y = extract_feature_and_y(df_train)
print("train x and y shape: {0} and {1}".format(
train_x.shape, train_y.shape))
test_x, test_y = extract_feature_and_y(df_test)
print("test x and y shape: {0} and {1}".format(
test_x.shape, test_y.shape))
# print("train x nan:", np.isfinite(train_x).any())
# print("train y nan:", np.isfinite(train_y).any())
# print("test x nan:", np.isfinite(test_x).any())
info = train_ridge_linear_model(train_x, train_y, test_x)
#info = train_lasso_model(train_x, train_y, test_x)
#info = train_EN_model(train_x, train_y, test_x)
_mse = mean_squared_error(test_y, info["y"])
_std = np.std(test_y - info["y"])
print("MSE on test data: %f" % _mse)
print("std of error on test data: %f" % _std)
plot_y(train_y, info["train_y"], test_y, info["y"])
def train(X_train, y_train):
model = Sequential()
model.add(LSTM(
lstm_neurons,
batch_input_shape=(batch_size, X_train.shape[1], X_train.shape[2]),
stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(epochs):
print 'batch', i+1
model.fit(
X_train,
y_train,
epochs=1,
batch_size=batch_size,
verbose=2,
shuffle=False,
validation_split=0.33)
model.reset_states()
return model
def run_model(model,dtrain,predictor_var,target,scoring_method='mean_squared_error'):
cv_method = KFold(len(dtrain),5)
cv_scores = cross_val_score(model,dtrain[predictor_var],dtrain[target],cv=cv_method,scoring=scoring_method)
#print cv_scores, np.mean(cv_scores), np.sqrt((-1)*np.mean(cv_scores))
dtrain_for_val = dtrain[dtrain['Year']<2000]
dtest_for_val = dtrain[dtrain['Year']>1999]
#cv_method = KFold(len(dtrain_for_val),5)
#cv_scores_2 = cross_val_score(model,dtrain_for_val[predictor_var],dtrain_for_val[target],cv=cv_method,scoring=scoring_method)
#print cv_scores_2, np.mean(cv_scores_2)
dtrain_for_val_ini = dtrain_for_val[predictor_var]
dtest_for_val_ini = dtest_for_val[predictor_var]
model.fit(dtrain_for_val_ini,dtrain_for_val[target])
pred_for_val = model.predict(dtest_for_val_ini)
#print math.sqrt(mean_squared_error(dtest_for_val['Footfall'],pred_for_val))
def arima(series, durations, order):
X = series.values
size = int(len(X) * 0.99)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
model = ARIMA(history, order=(5,1,0))
model_fit = model.fit(disp=0)
output = model_fit.forecast()
yhat = output[0]
predictions.append(yhat)
obs = test[t]
history.append(obs)
print('predicted=%f, expected=%f' % (yhat, obs))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
return predictions
# plot
def prediction_curve(dmat, vals, steps, radius):
"""Return MSE from predicting values from neighbors at radial steps."""
# Set null distances (greater than some threshold) to 0.
# Not in general a great idea, but fine here because we don't
# do anything with identity edges, and sums will be faster
# if we don't have to worry about nans
dmat = np.nan_to_num(dmat)
error_vals = []
for step in steps:
neighbors = (np.abs(dmat - step) < radius).astype(np.float)
neighbors /= neighbors.sum(axis=1, keepdims=True)
predicted = neighbors.dot(vals)
m = ~np.isnan(predicted)
error_vals.append(mean_squared_error(vals[m], predicted[m]))
return np.array(error_vals)
def _raw_rank(self, x, y, network):
impt = np.zeros(x.shape[1])
for i in range(x.shape[1]):
hold = np.array(x[:, i])
np.random.shuffle(x[:, i])
# Handle both TensorFlow and SK-Learn models.
if 'tensorflow' in str(type(network)).lower():
pred = list(network.predict(x, as_iterable=True))
else:
pred = network.predict(x)
rmse = metrics.mean_squared_error(y, pred)
impt[i] = rmse
x[:, i] = hold
return impt
def score_regression(y, y_hat, report=True):
"""
Create regression score
:param y:
:param y_hat:
:return:
"""
r2 = r2_score(y, y_hat)
rmse = sqrt(mean_squared_error(y, y_hat))
mae = mean_absolute_error(y, y_hat)
report_string = "---Regression Score--- \n"
report_string += "R2 = " + str(r2) + "\n"
report_string += "RMSE = " + str(rmse) + "\n"
report_string += "MAE = " + str(mae) + "\n"
if report:
print(report_string)
return mae, report_string
def _cross_val_score_loo_r0( lm, X, y):
"""
mean_square_error metric is used from sklearn.metric.
Return
--------
The mean squared error values are returned.
"""
if len( y.shape) == 1:
y = np.array( [y]).T
kf = cross_validation.LeaveOneOut( y.shape[0])
score_l = list()
for tr, te in kf:
lm.fit( X[tr,:], y[tr,:])
yp = lm.predict( X[te, :])
score_l.append( metrics.mean_squared_error( y[te,:], yp))
return score_l
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'):
"""
Parameters
-------------
scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
"""
print('If scoring is not r2 but error metric, output score is revered for scoring!')
print(xM.shape, yV.shape)
clf = linear_model.Ridge()
#parmas = {'alpha': np.logspace(1, -1, 9)}
parmas = {'alpha': np.logspace(*alphas_log)}
kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
kf_n = kf_n_c.split(xM)
gs = model_selection.GridSearchCV(
clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs)
gs.fit(xM, yV)
return gs
def gs_Ridge( xM, yV, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1, scoring = 'r2'):
"""
Parameters
-------------
scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
"""
print(xM.shape, yV.shape)
clf = linear_model.Ridge()
#parmas = {'alpha': np.logspace(1, -1, 9)}
parmas = {'alpha': np.logspace( *alphas_log)}
kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
gs = grid_search.GridSearchCV( clf, parmas, scoring = scoring, cv = kf_n, n_jobs = n_jobs)
gs.fit( xM, yV)
return gs
def test_Validate():
"""Test that Validate function works correctly"""
accuracy = an.validate(testing=True)
val = mean_squared_error(y, slr.predict(X))
assert np.allclose(accuracy,val)
accuracy = an.validate(testing=True, X=X, y=y, metric=mean_squared_error)
assert np.allclose(accuracy,val)
accuracy = an.validate(testing=True, metric=[mean_squared_error, r2_score])
val = [mean_squared_error(y, slr.predict(X)), r2_score(y, slr.predict(X))]
assert np.allclose(accuracy,val)
with pytest.raises(ValueError):
an.validate(X=[1,2,3])
def eval_pred( y_true, y_pred, eval_type):
if eval_type == 'logloss':#eval_type??????
loss = ll( y_true, y_pred )
print "logloss: ", loss
return loss
elif eval_type == 'auc':
loss = AUC( y_true, y_pred )
print "AUC: ", loss
return loss
elif eval_type == 'rmse':
loss = np.sqrt(mean_squared_error(y_true, y_pred))
print "rmse: ", loss
return loss
######### BaseModel Class #########
def K_FoldValidation(estimator, XMatrix, yVector, numFolds):
numTrainingExamples = len(XMatrix)
K = numFolds
if K < 2:
print("Error, K must be greater than or equal to 2")
exit(-10)
elif K > numTrainingExamples:
print("Error, K must be less than or equal to the number of training examples")
exit(-11)
K_folds = model_selection.KFold(numTrainingExamples, K)
for k, (train_index, test_index) in enumerate(K_folds):
X_train, X_test = XMatrix[train_index], XMatrix[test_index]
y_train, y_test = yVector[train_index], yVector[test_index]
# Fit
estimator.fit(X_train, y_train, logdir='')
# Predict and score
score = metrics.mean_squared_error(estimator.predict(X_test), y_test)
print('Iteration {0:f} MSE: {1:f}'.format(k+1, score))
def test_input_data_continuous(self, learner, filename):
# Load data
data = Orange.data.Table(filename)
# Train recommender
recommender = learner(data)
print(str(recommender) + ' trained')
# Compute predictions
y_pred = recommender(data)
# Compute RMSE
rmse = math.sqrt(mean_squared_error(data.Y, y_pred))
print('-> RMSE (input data; continuous): %.3f' % rmse)
# Check correctness
self.assertGreaterEqual(rmse, 0)
def test_input_data_discrete(self, learner, filename):
# Load data
data = Orange.data.Table(filename)
# Train recommender
recommender = learner(data)
print(str(recommender) + ' trained')
# Compute predictions
y_pred = recommender(data)
# Compute RMSE
rmse = math.sqrt(mean_squared_error(data.Y, y_pred))
print('-> RMSE (input data; discrete): %.3f' % rmse)
# Check correctness
self.assertGreaterEqual(rmse, 0)
def mean_squared_error_(ground_truth, predictions):
return mean_squared_error(ground_truth, predictions) ** 0.5
def mean_squared_error_(ground_truth, predictions):
return mean_squared_error(ground_truth, predictions) ** 0.5
def mean_squared_error_(ground_truth, predictions):
return mean_squared_error(ground_truth, predictions) ** 0.5
def train_and_eval_sklearn_regressor( clf, data ):
x_train = data['x_train']
y_train = data['y_train']
x_test = data['x_test']
y_test = data['y_test']
clf.fit( x_train, y_train )
p = clf.predict( x_train )
mse = MSE( y_train, p )
rmse = sqrt( mse )
mae = MAE( y_train, p )
print "\n# training | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae )
#
p = clf.predict( x_test )
mse = MSE( y_test, p )
rmse = sqrt( mse )
mae = MAE( y_test, p )
print "# testing | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae )
return { 'loss': rmse, 'rmse': rmse, 'mae': mae }