def mult_regression(wine_set):
# center quantitative IVs for regression analysis
w = wine_set['quality']
wine_set = wine_set - wine_set.mean()
wine_set['quality'] = w
print ("OLS multivariate regression model")
# first i have run with all columns; than chose the most significant for each wine set and rerun:
if len(wine_set) < 2000:
# for red
model1 = smf.ols(
formula="quality ~ volatile_acidity + chlorides + pH + sulphates + alcohol",
data=wine_set)
else:
# for white
model1 = smf.ols(
formula="quality ~ volatile_acidity + density + pH + sulphates + alcohol",
data=wine_set)
results1 = model1.fit()
print(results1.summary())
# q-q plot for normality
qq = sm.qqplot(results1.resid, line = 'r')
plt.show()
# plot of residuals
stdres = pd.DataFrame(results1.resid_pearson)
plt.plot(stdres, 'o', ls = 'None')
l = plt.axhline(y=0, color = 'r')
plt.ylabel('Standardized redisual')
plt.xlabel('Observation number')
plt.show()
# # diagnostic plots
# figure1 = plt.figure(figsize=(12, 8))
# figure1 = sm.graphics.plot_regress_exog(results1, "alcohol", fig = figure1)
# plt.show()
#
# figure1 = plt.figure(figsize=(12, 8))
# figure1 = sm.graphics.plot_regress_exog(results1, "sulphates", fig = figure1)
# plt.show()
# leverage plot
figure1 = sm.graphics.influence_plot(results1, size=8)
plt.show()
# call(mult_regression)
# ____________________________ Logistic Regression _____________________
regression_modeling.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录