def lasso_regr(wine_set):
pred = wine_set[["density", 'alcohol', 'sulphates', 'pH', 'volatile_acidity', 'chlorides', 'fixed_acidity',
'citric_acid', 'residual_sugar', 'free_sulfur_dioxide', 'total_sulfur_dioxide']]
predictors = pred.copy()
targets = wine_set.quality
# standardize predictors to have mean=0 and sd=1
predictors = pd.DataFrame(preprocessing.scale(predictors))
predictors.columns = pred.columns
# print(predictors.head())
# split into training and testing sets
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.3, random_state=123)
# specify the lasso regression model
model = LassoLarsCV(cv=10, precompute=False).fit(pred_train, tar_train)
print('Predictors and their regression coefficients:')
d = dict(zip(predictors.columns, model.coef_))
for k in d:
print(k, ':', d[k])
# plot coefficient progression
m_log_alphas = -np.log10(model.alphas_)
# ax = plt.gca()
plt.plot(m_log_alphas, model.coef_path_.T)
print('\nAlpha:', model.alpha_)
plt.axvline(-np.log10(model.alpha_), linestyle="dashed", color='k', label='alpha CV')
plt.ylabel("Regression coefficients")
plt.xlabel("-log(alpha)")
plt.title('Regression coefficients progression for Lasso paths')
plt.show()
# plot mean squared error for each fold
m_log_alphascv = -np.log10(model.cv_alphas_)
plt.plot(m_log_alphascv, model.cv_mse_path_, ':')
plt.plot(m_log_alphascv, model.cv_mse_path_.mean(axis=-1), 'k', label='Average across the folds', linewidth=2)
plt.legend()
plt.xlabel('-log(alpha)')
plt.ylabel('Mean squared error')
plt.title('Mean squared error on each fold')
plt.show()
# Mean squared error from training and test data
train_error = mean_squared_error(tar_train, model.predict(pred_train))
test_error = mean_squared_error(tar_test, model.predict(pred_test))
print('\nMean squared error for training data:', train_error)
print('Mean squared error for test data:', test_error)
rsquared_train = model.score(pred_train, tar_train)
rsquared_test = model.score(pred_test, tar_test)
print('\nR-square for training data:', rsquared_train)
print('R-square for test data:', rsquared_test)
#
# print('----------------Lasso Regression------------------------')
# call(lasso_regr)
# ______________________________K-Means Cluster Analysis_________________
machine_learning.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录