def test_linear_svr_evaluation(self):
"""
Check that the evaluation results are the same in scikit learn and coremltools
"""
ARGS = [ {},
{'C': 0.5, 'epsilon': 0.25},
{'dual': False, 'loss': 'squared_epsilon_insensitive'},
{'tol': 0.005},
{'fit_intercept': False},
{'intercept_scaling': 1.5}
]
input_names = self.scikit_data.feature_names
df = pd.DataFrame(self.scikit_data.data, columns=input_names)
for cur_args in ARGS:
print(cur_args)
cur_model = LinearSVR(**cur_args)
cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
spec = convert(cur_model, input_names, 'target')
df['prediction'] = cur_model.predict(self.scikit_data.data)
metrics = evaluate_regressor(spec, df)
self.assertAlmostEquals(metrics['max_error'], 0)
python类LinearSVR()的实例源码
def test_svr():
# Test Support Vector Regression
diabetes = datasets.load_diabetes()
for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0),
svm.NuSVR(kernel='linear', nu=.4, C=10.),
svm.SVR(kernel='linear', C=10.),
svm.LinearSVR(C=10.),
svm.LinearSVR(C=10.),
):
clf.fit(diabetes.data, diabetes.target)
assert_greater(clf.score(diabetes.data, diabetes.target), 0.02)
# non-regression test; previously, BaseLibSVM would check that
# len(np.unique(y)) < 2, which must only be done for SVC
svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data)))
svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
figure.classification.vs.regression.py 文件源码
项目:microbiome-summer-school-2017
作者: aldro61
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def make_regression_example(axis, random_state):
X, y = make_regression(n_samples=100, n_features=1, noise=30.0, random_state=random_state)
axis.scatter(X[:, 0], y, color="blue", s=10, label="Patients")
clf = LinearSVR().fit(X, y)
axis.plot(X[:, 0], clf.predict(X), color="black", label="Model")
ax2.tick_params(labelbottom='off', labelleft='off')
ax2.set_xlabel("Gene 1")
ax2.set_ylabel("Survived (years)")
ax2.legend()
def convert(model, features, target):
"""Convert a LinearSVR model to the protobuf spec.
Parameters
----------
model: LinearSVR
A trained LinearSVR model.
feature_names: [str]
Name of the input columns.
target: str
Name of the output column.
Returns
-------
model_spec: An object of type Model_pb.
Protobuf representation of the model
"""
if not(_HAS_SKLEARN):
raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
# Check the scikit learn model
_sklearn_util.check_expected_type(model, _LinearSVR)
_sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
return _MLModel(_linear_regression._convert(model, features, target))
def __init__(self, classifier=LinearSVR):
self.svc = classifier()
self.num_terms = -1
def convert(model, features, target):
"""Convert a LinearSVR model to the protobuf spec.
Parameters
----------
model: LinearSVR
A trained LinearSVR model.
feature_names: [str]
Name of the input columns.
target: str
Name of the output column.
Returns
-------
model_spec: An object of type Model_pb.
Protobuf representation of the model
"""
if not(_HAS_SKLEARN):
raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
# Check the scikit learn model
_sklearn_util.check_expected_type(model, _LinearSVR)
_sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
return _MLModel(_linear_regression._convert(model, features, target))
def predict(data, priceToPredict):
openingPriceTrain, openingPriceTest, closingPriceTrain, closingPriceTest = \
data["openingPriceTrain"], data["openingPriceTest"], data["closingPriceTrain"], data["closingPriceTest"]
clf = svm.LinearSVR()
clf.fit(openingPriceTrain, closingPriceTrain)
predicted2 = clf.predict(openingPriceTest)
score = clf.fit(openingPriceTrain, closingPriceTrain).score(openingPriceTest, closingPriceTest)
# print(score)
fig, ax = plotter.subplots()
ax.scatter(openingPriceTrain, closingPriceTrain)
ax.set_ylabel('Predicted SVM')
ax.scatter(closingPriceTest, clf.predict(openingPriceTest))
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
# plotter.show()
closingPriceTestArray = np.reshape(closingPriceTest,-1)
clfpr = clf.predict(openingPriceTest)
predictedArray = np.reshape(clfpr,-1)
print(pearsonr(closingPriceTestArray,predictedArray))
openingPriceToPredict = np.array([priceToPredict])
print(clf.predict(openingPriceToPredict))
return clf.predict(np.array([openingPriceToPredict]))
def _initEstimator(self, X, Y):
#estimator = svm.SVR(kernel="linear",shrinking=False)
estimator = svm.LinearSVR(
loss="squared_epsilon_insensitive",
dual=False,
random_state=self.random_state)
tuned_parameters = {'C': [self.C], 'epsilon': [self.epsilon]}
if self.C is None:
tuned_parameters["C"] = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
if self.epsilon is None:
tuned_parameters["epsilon"] = [0.0001, 0.001, 0.01, 0.1, 1, 2, 5]
n = len(X)
if n <= 20:
cv = 3
else:
cv = 7
gridsearch = GridSearchCV(estimator,
tuned_parameters,
scoring="r2",
n_jobs=-1 if self.parallel else 1,
cv=cv,
verbose=0)
gridsearch.fit(X, Y)
self._hyper_C = gridsearch.best_params_['C']
self._hyper_epsilon = gridsearch.best_params_['epsilon']
self._best_clf_score = gridsearch.best_score_
self._svm_clf = best_clf = gridsearch.best_estimator_
self._svm_coef = best_clf.coef_
self._svm_bias = -best_clf.intercept_[0]
self._svm_L1 = np.linalg.norm(self._svm_coef, ord=1)
prediction = best_clf.predict(X)
self._svm_loss = np.sum(np.abs(Y - prediction))
self._svm_coef = self._svm_coef[0]
def test_AdaBoostRegressor_base_regr(*data):
'''
test the regression with different number of model and regression method
:param data: train_data, test_data, train_value, test_value
:return: None
'''
from sklearn.svm import LinearSVR
X_train,X_test,y_train,y_test=data
fig=plt.figure()
regrs=[ensemble.AdaBoostRegressor(),
ensemble.AdaBoostRegressor(base_estimator=LinearSVR(epsilon=0.01,C=100))]
labels=["Decision Tree Regressor","Linear SVM Regressor"]
for i ,regr in enumerate(regrs):
ax=fig.add_subplot(2,1,i+1)
regr.fit(X_train,y_train)
## graph
estimators_num=len(regr.estimators_)
X=range(1,estimators_num+1)
ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score")
ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score")
ax.set_xlabel("estimator num")
ax.set_ylabel("score")
ax.legend(loc="lower right")
ax.set_ylim(-1,1)
ax.set_title("Base_Estimator:%s"%labels[i])
plt.suptitle("AdaBoostRegressor")
plt.show()
def test_LinearSVR(*data):
'''
test Liner SVR
:param data: train_data,test_data, train_target, test_target
:return: None
'''
X_train,X_test,y_train,y_test=data
regr=svm.LinearSVR()
regr.fit(X_train,y_train)
print('Coefficients:{0}, intercept {1}'.format(regr.coef_,regr.intercept_))
print('Score: {0}' .format(regr.score(X_test, y_test)))
def test_LinearSVR_loss(*data):
'''
test SVr with different loss function
:param data: train_data,test_data, train_target, test_target
:return:
'''
X_train,X_test,y_train,y_test=data
losses=['epsilon_insensitive','squared_epsilon_insensitive']
for loss in losses:
regr=svm.LinearSVR(loss=loss)
regr.fit(X_train,y_train)
print("loss?{0}".format(loss))
print('Coefficients:{0}, intercept {1}'.format(regr.coef_,regr.intercept_))
print('Score: {0}' .format(regr.score(X_test, y_test)))
def test_LinearSVR_epsilon(*data):
'''
test the performance with different epsilon
:param data: train_data,test_data, train_target, test_target
:return: None
'''
X_train,X_test,y_train,y_test=data
epsilons=np.logspace(-2,2)
train_scores=[]
test_scores=[]
for epsilon in epsilons:
regr=svm.LinearSVR(epsilon=epsilon,loss='squared_epsilon_insensitive')
regr.fit(X_train,y_train)
train_scores.append(regr.score(X_train, y_train))
test_scores.append(regr.score(X_test, y_test))
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(epsilons,train_scores,label="Training score ",marker='+' )
ax.plot(epsilons,test_scores,label= " Testing score ",marker='o' )
ax.set_title( "LinearSVR_epsilon ")
ax.set_xscale("log")
ax.set_xlabel(r"$\epsilon$")
ax.set_ylabel("score")
ax.set_ylim(-1,1.05)
ax.legend(loc="best",framealpha=0.5)
plt.show()
def test_LinearSVR_C(*data):
'''
test the performance with different C
:param data: train_data,test_data, train_target, test_target
:return: None
'''
X_train,X_test,y_train,y_test=data
Cs=np.logspace(-1,2)
train_scores=[]
test_scores=[]
for C in Cs:
regr=svm.LinearSVR(epsilon=0.1,loss='squared_epsilon_insensitive',C=C)
regr.fit(X_train,y_train)
train_scores.append(regr.score(X_train, y_train))
test_scores.append(regr.score(X_test, y_test))
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(Cs,train_scores,label="Training score ",marker='+' )
ax.plot(Cs,test_scores,label= " Testing score ",marker='o' )
ax.set_title( "LinearSVR_C ")
ax.set_xscale("log")
ax.set_xlabel(r"C")
ax.set_ylabel("score")
ax.set_ylim(-1,1.05)
ax.legend(loc="best",framealpha=0.5)
plt.show()
def lr(train_sample, validation_sample, features):
log_base = np.e
lr_prob = LinearSVR(C=1, epsilon=0.1).fit(train_sample[features], np.log1p(train_sample['volume'])/np.log(log_base))\
.predict(validation_sample[features])
lr_prob = np.power(log_base, lr_prob) - 1
print_mape(validation_sample['volume'], lr_prob, 'LR')
return lr_prob
def test_linearsvr():
# check that SVR(kernel='linear') and LinearSVC() give
# comparable results
diabetes = datasets.load_diabetes()
lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
score1 = lsvr.score(diabetes.data, diabetes.target)
svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target)
score2 = svr.score(diabetes.data, diabetes.target)
assert np.linalg.norm(lsvr.coef_ - svr.coef_) / np.linalg.norm(svr.coef_) < .1
assert np.abs(score1 - score2) < 0.1
def test_linearsvx_loss_penalty_deprecations():
X, y = [[0.0], [1.0]], [0, 1]
msg = ("loss='%s' has been deprecated in favor of "
"loss='%s' as of 0.16. Backward compatibility"
" for the %s will be removed in %s")
# LinearSVC
# loss l1 --> hinge
assert_warns_message(DeprecationWarning,
msg % ("l1", "hinge", "loss='l1'", "1.0"),
svm.LinearSVC(loss="l1").fit, X, y)
# loss l2 --> squared_hinge
assert_warns_message(DeprecationWarning,
msg % ("l2", "squared_hinge", "loss='l2'", "1.0"),
svm.LinearSVC(loss="l2").fit, X, y)
# LinearSVR
# loss l1 --> epsilon_insensitive
assert_warns_message(DeprecationWarning,
msg % ("l1", "epsilon_insensitive", "loss='l1'",
"1.0"),
svm.LinearSVR(loss="l1").fit, X, y)
# loss l2 --> squared_epsilon_insensitive
assert_warns_message(DeprecationWarning,
msg % ("l2", "squared_epsilon_insensitive",
"loss='l2'", "1.0"),
svm.LinearSVR(loss="l2").fit, X, y)
def test_svr_coef_sign():
# Test that SVR(kernel="linear") has coef_ with the right sign.
# Non-regression test for #2933.
X = np.random.RandomState(21).randn(10, 3)
y = np.random.RandomState(12).randn(10)
for svr in [svm.SVR(kernel='linear'), svm.NuSVR(kernel='linear'),
svm.LinearSVR()]:
svr.fit(X, y)
assert_array_almost_equal(svr.predict(X),
np.dot(X, svr.coef_.ravel()) + svr.intercept_)