def drawValidationCurve(self):
"""
To draw the validation curve
:return:NA
"""
X, y = self.X_train, self.y_train.ravel()
indices = np.arange(y.shape[0])
#np.random.shuffle(indices)
X, y = X[indices], y[indices]
train_sizes = range(2,60)
train_scores, valid_scores = validation_curve(DecisionTreeRegressor(max_features=None), X, y, "max_depth",
train_sizes, cv=5, scoring='mean_squared_error')
train_scores = -1.0/5 *train_scores
valid_scores = -1.0/5 *valid_scores
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1,
color="r")
plt.fill_between(train_sizes, valid_scores_mean - valid_scores_std,
valid_scores_mean + valid_scores_std, alpha=0.1, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
label="Training MSE")
plt.plot(train_sizes, valid_scores_mean, '*-', color="g",
label="Cross-validation MSE")
plt.legend(loc="best")
plt.xlabel('Max Depth')
plt.ylabel('MSE')
plt.title('Validation Curve with Decision \nTree Regression on the parameter of Max Depth')
plt.grid(True)
plt.show()
python类DecisionTreeRegressor()的实例源码
def test_search_cv_results_none_param():
X, y = [[1], [2], [3], [4], [5]], [0, 0, 0, 0, 1]
estimators = (DecisionTreeRegressor(), DecisionTreeClassifier())
est_parameters = {"random_state": [0, None]}
cv = KFold(random_state=0)
for est in estimators:
grid_search = dcv.GridSearchCV(est, est_parameters, cv=cv).fit(X, y)
assert_array_equal(grid_search.cv_results_['param_random_state'],
[0, None])
def regress(y, x, test_x=[]):
if len(test_x) == 0:
test_x = x
clf = DecisionTreeRegressor()
clf.fit(x, y)
y_p = clf.predict(test_x)
plt.scatter(y, y_p)
def ada_boost_tree_grid_search():
ada_boost_tree_grid = {
'base_estimator__max_features': ['sqrt'],
'base_estimator__splitter': ['best', 'random'],
'base_estimator__min_samples_split': [2, 4],
'base_estimator__max_depth': [1, 3],
'n_estimators': [50, 100, 1000],
'learning_rate': [.001, .01, .1],
'loss': ['linear', 'square', 'exponential']
}
abr = AdaBoostRegressor(DecisionTreeRegressor())
return ada_boost_tree_grid, abr
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
from sklearn.datasets import load_boston
from sklearn.tree import DecisionTreeRegressor
scikit_data = load_boston()
scikit_model = DecisionTreeRegressor(random_state = 1)
scikit_model.fit(scikit_data['data'], scikit_data['target'])
# Save the data and the model
self.scikit_data = scikit_data
self.scikit_model = scikit_model
def test_conversion_bad_inputs(self):
# Error on converting an untrained model
with self.assertRaises(Exception):
model = DecisionTreeRegressor()
spec = skl_converter.convert(model, 'data', 'out')
# Check the expected class during covnersion.
from sklearn.preprocessing import OneHotEncoder
with self.assertRaises(Exception):
model = OneHotEncoder()
spec = skl_converter.convert(model, 'data', 'out')
test_decision_tree_regression_numeric.py 文件源码
项目:coremltools
作者: apple
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
from sklearn.datasets import load_boston
from sklearn.tree import DecisionTreeRegressor
# Load data and train model
scikit_data = load_boston()
self.scikit_data = scikit_data
self.X = scikit_data['data']
self.target = scikit_data['target']
self.feature_names = scikit_data.feature_names
self.output_name = 'target'
def spot_check(X, y):
if type == 'regression':
models = [
(LinearRegression(), 'Ordinary Least Squares'),
(Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
(Ridge(), 'Ridge (alpha 1.0)'),
(Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
(Lasso(), 'Lasso (alpha 1.0)'),
(ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
(ElasticNet(), 'ElasticNet (alpha 1.0)'),
(DecisionTreeRegressor(), 'Decision Tree'),
(KNeighborsRegressor(), 'K-Nearest Neighbors'),
# (RandomForestRegressor(), 'Random Forest Regressor'),
# (BaggingRegressor(), 'Bagging Regressor'),
# (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
# (SVR(), 'Support Vector Regression')
]
splits = 5
scores = []
for model, model_name in models:
score = check_model(model, splits, X, y)
# get average score
scores.append(score)
model_names = map(lambda x: x[1], models)
for name, score in zip(model_names, scores):
print('%s: %f' % (name, score))
def get_classifier(self, X, Y):
""" ????????
:param X: ????
:param Y: ??????
:return: ??
"""
# rng = np.random.RandomState(1)
clf = AdaBoostRegressor(DecisionTreeRegressor())
clf.fit(X, Y)
return clf
def get_classifier(self, X, Y):
""" ????????
:param X: ????
:param Y: ??????
:return: ??
"""
clf = DecisionTreeRegressor()
clf.fit(X, Y)
return clf
def get_classifier(self, X, Y):
""" ????????
:param X: ????
:param Y: ??????
:return: ??
"""
clf = DecisionTreeRegressor(max_depth=4)
clf.fit(X, Y)
return clf
def test_boston(self):
from sklearn.tree import DecisionTreeRegressor as DecisionTreeRegressorSklearn
model = DecisionTreeRegressor(max_n_splits=3)
model_sklearn = DecisionTreeRegressorSklearn()
dataset = load_boston()
mse = []
mse_sklearn = []
for fold in range(5):
X_train, X_test, y_train, y_test = train_test_split(
dataset.data, dataset.target, test_size=0.33)
model.fit(X_train, y_train)
y = model.predict(X_test)
mse.append(mean_squared_error(y, y_test))
model_sklearn.fit(X_train, y_train)
y = model_sklearn.predict(X_test)
mse_sklearn.append(mean_squared_error(y, y_test))
mean_mse = np.mean(mse)
mean_mse_sklearn = np.mean(mse_sklearn)
print(mean_mse, mean_mse_sklearn)
# Check that our model differs in MSE no worse than 20%
self.assertTrue(np.abs(mean_mse - mean_mse_sklearn) / mean_mse_sklearn < 0.2)
def test_boston(self):
from sklearn.tree import DecisionTreeRegressor as DecisionTreeRegressorSklearn
model = DecisionTreeRegressor(tree_type='oblivious', max_n_splits=3)
model_sklearn = DecisionTreeRegressorSklearn()
dataset = load_boston()
mse = []
mse_sklearn = []
for fold in range(5):
X_train, X_test, y_train, y_test = train_test_split(
dataset.data, dataset.target, test_size=0.33)
model.fit(X_train, y_train)
y = model.predict(X_test)
mse.append(mean_squared_error(y, y_test))
model_sklearn.fit(X_train, y_train)
y = model_sklearn.predict(X_test)
mse_sklearn.append(mean_squared_error(y, y_test))
mean_mse = np.mean(mse)
mean_mse_sklearn = np.mean(mse_sklearn)
print(mean_mse, mean_mse_sklearn)
# Check that our model differs in MSE no worse than 50%
self.assertTrue(np.abs(mean_mse - mean_mse_sklearn) / mean_mse_sklearn < 0.5)
# def test_check_estimators(self):
# """
# Tests that models adhere to scikit-learn Estimator interface.
# """
# check_estimator(DecisionTreeClassifier)
def __init__(self, problem_type):
self.problem_type = problem_type
if self._is_classification():
self.model = DecisionTreeClassifier(random_state=RANDOM_STATE+1)
elif self._is_regression():
self.model = DecisionTreeRegressor(random_state=RANDOM_STATE+2)
else:
raise NotImplementedError
def evaluate_decision_tree_regression(X, y):
tree = DecisionTreeRegressor(max_depth=3)
tree.fit(X, y)
sort_index = X.flatten().argsort()
lin_regplot(X[sort_index], y[sort_index], tree)
plt.xlabel('% lower status of the population [LSTAT]')
plt.ylabel("Price in $1000's [MEDV]")
plt.show()
def __init__(self, base_estimator=None, n_estimators=50, max_features=1.0,
max_depth=6, learning_rate=1.0, loss='linear', random_state=None):
if base_estimator and base_estimator == 'etr':
base_estimator = ExtraTreeRegressor(max_depth=max_depth,
max_features=max_features)
else:
base_estimator = DecisionTreeRegressor(max_depth=max_depth,
max_features=max_features)
self.model = sklearn.ensemble.AdaBoostRegressor(
base_estimator=base_estimator,
n_estimators=n_estimators,
learning_rate=learning_rate,
random_state=random_state,
loss=loss)
def test_DecisionTreeRegressor_splitter(*data):
'''
test the performance with different splitters
:param data: train_data, test_data, train_value, test_value
:return: None
'''
X_train,X_test,y_train,y_test=data
splitters=['best','random']
for splitter in splitters:
regr = DecisionTreeRegressor(splitter=splitter)
regr.fit(X_train, y_train)
print("Splitter {0}".format(splitter))
print("Training score:{0}".format(regr.score(X_train,y_train)))
print("Testing score:{0}".format(regr.score(X_test,y_test)))
def test_DecisionTreeRegressor_depth(*data,maxdepth):
'''
test the score with different max_depth
:param data: train_data, test_data, train_value, test_value
:param maxdepth: an integer
:return: None
'''
X_train,X_test,y_train,y_test=data
depths=np.arange(1,maxdepth)
training_scores=[]
testing_scores=[]
for depth in depths:
regr = DecisionTreeRegressor(max_depth=depth)
regr.fit(X_train, y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
## graph
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(depths,training_scores,label="traing score")
ax.plot(depths,testing_scores,label="testing score")
ax.set_xlabel("maxdepth")
ax.set_ylabel("score")
ax.set_title("Decision Tree Regression")
ax.legend(framealpha=0.5)
plt.show()
def test_presort_sparse():
ests = (DecisionTreeClassifier(presort=True),
DecisionTreeRegressor(presort=True))
sparse_matrices = (csr_matrix, csc_matrix, coo_matrix)
y, X = datasets.make_multilabel_classification(random_state=0,
n_samples=50,
n_features=1,
n_classes=20)
y = y[:, 0]
for est, sparse_matrix in product(ests, sparse_matrices):
yield check_presort_sparse, est, sparse_matrix(X), y
def test_oob_score_regression():
# Check that oob prediction is a good estimation of the generalization
# error.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
n_estimators=50,
bootstrap=True,
oob_score=True,
random_state=rng).fit(X_train, y_train)
test_score = clf.score(X_test, y_test)
assert_less(abs(test_score - clf.oob_score_), 0.1)
# Test with few estimators
assert_warns(UserWarning,
BaggingRegressor(base_estimator=DecisionTreeRegressor(),
n_estimators=1,
bootstrap=True,
oob_score=True,
random_state=rng).fit,
X_train,
y_train)