def train_lassolars_model(train_x, train_y, predict_x):
print_title("LassoLars Regressor")
reg = linear_model.LassoLarsCV(
cv=10, n_jobs=3, max_iter=2000, normalize=False)
reg.fit(train_x, train_y)
print("alphas and cv_alphas: {0} and {1}".format(
reg.alphas_.shape, reg.cv_alphas_.shape))
print("alphas[%d]: %s" % (len(reg.cv_alphas_), reg.cv_alphas_))
print("mse shape: {0}".format(reg.cv_mse_path_.shape))
# print("mse: %s" % np.mean(_mse, axis=0))
# print("mse: %s" % np.mean(_mse, axis=1))
# index = np.where(reg.alphas_ == reg.alpha_)
# print("itemindex: %s" % index)
index = np.where(reg.cv_alphas_ == reg.alpha_)
_mse_v = np.mean(reg.cv_mse_path_[index, :])
print("mse value: %f" % _mse_v)
print("best alpha: %f" % reg.alpha_)
best_alpha = reg.alpha_
reg = linear_model.LassoLars(alpha=best_alpha)
reg.fit(train_x, train_y)
n_nonzeros = (reg.coef_ != 0).sum()
print("Non-zeros coef: %d" % n_nonzeros)
predict_y = reg.predict(predict_x)
return {'y': predict_y, "coef": reg.coef_}
python类LassoLarsCV()的实例源码
def test_get_errors_param(self):
"""
Test known models we can get the cv errors for alpha selection
"""
# Test original CV models
for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
try:
model = AlphaSelection(model())
X, y = make_regression()
model.fit(X, y)
errors = model._find_errors_param()
self.assertTrue(len(errors) > 0)
except YellowbrickValueError:
self.fail("could not find errors on {}".format(model.name))
def test_few_fit_shapes():
"""test_few.py: fit and predict return correct shapes """
np.random.seed(202)
# load example data
boston = load_boston()
d = pd.DataFrame(data=boston.data)
print("feature shape:",boston.data.shape)
learner = FEW(generations=1, population_size=5,
mutation_rate=0.2, crossover_rate=0.8,
ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
sel = 'epsilon_lexicase', tourn_size = 2,
random_state=0, verbosity=0,
disable_update_check=False, fit_choice = 'mse')
score = learner.fit(boston.data[:300], boston.target[:300])
print("learner:",learner._best_estimator)
yhat_test = learner.predict(boston.data[300:])
test_score = learner.score(boston.data[300:],boston.target[300:])
print("train score:",score,"test score:",test_score,
"test r2:",r2_score(boston.target[300:],yhat_test))
assert yhat_test.shape == boston.target[300:].shape
def test_few_with_parents_weight():
"""test_few.py: few performs without error with parent pressure for selection"""
np.random.seed(1006987)
boston = load_boston()
d = np.column_stack((boston.data,boston.target))
np.random.shuffle(d)
features = d[:,0:-1]
target = d[:,-1]
print("feature shape:",boston.data.shape)
learner = FEW(generations=1, population_size=5,
mutation_rate=1, crossover_rate=1,
ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
sel = 'tournament', fit_choice = 'r2',tourn_size = 2, random_state=0, verbosity=0,
disable_update_check=False, weight_parents=True)
learner.fit(features[:300], target[:300])
few_score = learner.score(features[:300], target[:300])
test_score = learner.score(features[300:],target[300:])
print("few score:",few_score)
print("few test score:",test_score)
def test_lasso_cv():
X, y, X_test, y_test = build_dataset()
max_iter = 150
clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
assert_almost_equal(clf.alpha_, 0.056, 2)
clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
clf.fit(X, y)
assert_almost_equal(clf.alpha_, 0.056, 2)
# Check that the lars and the coordinate descent implementation
# select a similar alpha
lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y)
# for this we check that they don't fall in the grid of
# clf.alphas further than 1
assert_true(np.abs(
np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
# check that they also give a similar MSE
mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.cv_mse_path_.T)
np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
clf.mse_path_[5].mean(), significant=2)
# test set
assert_greater(clf.score(X_test, y_test), 0.99)
def test_regressor_cv(self):
"""
Ensure only "CV" regressors are allowed
"""
for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
with self.assertRaises(YellowbrickTypeError):
alphas = AlphaSelection(model())
for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
try:
alphas = AlphaSelection(model())
except YellowbrickTypeError:
self.fail("could not instantiate RegressorCV on alpha selection")
def test_get_alphas_param_lassolars(self):
"""
Assert that we can get alphas from lasso lars.
"""
X, y = make_regression()
model = AlphaSelection(LassoLarsCV())
model.fit(X, y)
try:
malphas = model._find_alphas_param()
self.assertTrue(len(malphas) > 0)
except YellowbrickValueError:
self.fail("could not find alphas on {}".format(model.name))
def _fit_model(x, y, names, operators, **kw):
steps = [("trafo", LibTrafo(names, operators)), ("lasso", LassoLarsCV(**kw))]
model = Pipeline(steps).fit(x, y)
return model, model.score(x, y)
def test_few_at_least_as_good_as_default():
"""test_few.py: few performs at least as well as the default ML """
np.random.seed(1006987)
boston = load_boston()
d = np.column_stack((boston.data,boston.target))
np.random.shuffle(d)
features = d[:,0:-1]
target = d[:,-1]
print("feature shape:",boston.data.shape)
learner = FEW(generations=1, population_size=5,
ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
sel = 'tournament')
learner.fit(features[:300], target[:300])
few_score = learner.score(features[:300], target[:300])
few_test_score = learner.score(features[300:],target[300:])
lasso = LassoLarsCV()
lasso.fit(features[:300], target[:300])
lasso_score = lasso.score(features[:300], target[:300])
lasso_test_score = lasso.score(features[300:],target[300:])
print("few score:",few_score,"lasso score:",lasso_score)
print("few test score:",few_test_score,"lasso test score:",
lasso_test_score)
assert round(few_score,8) >= round(lasso_score,8)
print("lasso coefficients:",lasso.coef_)
# assert False
def test_lars_cv():
# Test the LassoLarsCV object by checking that the optimal alpha
# increases as the number of samples increases.
# This property is not actually guaranteed in general and is just a
# property of the given dataset, with the given steps chosen.
old_alpha = 0
lars_cv = linear_model.LassoLarsCV()
for length in (400, 200, 100):
X = diabetes.data[:length]
y = diabetes.target[:length]
lars_cv.fit(X, y)
np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
old_alpha = lars_cv.alpha_
machine_learning.py 文件源码
项目:-Python-Analysis_of_wine_quality
作者: ekolik
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def lasso_regr(wine_set):
pred = wine_set[["density", 'alcohol', 'sulphates', 'pH', 'volatile_acidity', 'chlorides', 'fixed_acidity',
'citric_acid', 'residual_sugar', 'free_sulfur_dioxide', 'total_sulfur_dioxide']]
predictors = pred.copy()
targets = wine_set.quality
# standardize predictors to have mean=0 and sd=1
predictors = pd.DataFrame(preprocessing.scale(predictors))
predictors.columns = pred.columns
# print(predictors.head())
# split into training and testing sets
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.3, random_state=123)
# specify the lasso regression model
model = LassoLarsCV(cv=10, precompute=False).fit(pred_train, tar_train)
print('Predictors and their regression coefficients:')
d = dict(zip(predictors.columns, model.coef_))
for k in d:
print(k, ':', d[k])
# plot coefficient progression
m_log_alphas = -np.log10(model.alphas_)
# ax = plt.gca()
plt.plot(m_log_alphas, model.coef_path_.T)
print('\nAlpha:', model.alpha_)
plt.axvline(-np.log10(model.alpha_), linestyle="dashed", color='k', label='alpha CV')
plt.ylabel("Regression coefficients")
plt.xlabel("-log(alpha)")
plt.title('Regression coefficients progression for Lasso paths')
plt.show()
# plot mean squared error for each fold
m_log_alphascv = -np.log10(model.cv_alphas_)
plt.plot(m_log_alphascv, model.cv_mse_path_, ':')
plt.plot(m_log_alphascv, model.cv_mse_path_.mean(axis=-1), 'k', label='Average across the folds', linewidth=2)
plt.legend()
plt.xlabel('-log(alpha)')
plt.ylabel('Mean squared error')
plt.title('Mean squared error on each fold')
plt.show()
# Mean squared error from training and test data
train_error = mean_squared_error(tar_train, model.predict(pred_train))
test_error = mean_squared_error(tar_test, model.predict(pred_test))
print('\nMean squared error for training data:', train_error)
print('Mean squared error for test data:', test_error)
rsquared_train = model.score(pred_train, tar_train)
rsquared_test = model.score(pred_test, tar_test)
print('\nR-square for training data:', rsquared_train)
print('R-square for test data:', rsquared_test)
#
# print('----------------Lasso Regression------------------------')
# call(lasso_regr)
# ______________________________K-Means Cluster Analysis_________________