def tsplot(y, lags=None, figsize=(10, 8), style='bmh'):
if not isinstance(y, pd.Series):
y = pd.Series(y)
with plt.style.context(style):
fig = plt.figure(figsize=figsize)
# mpl.rcParams['font.family'] = 'Ubuntu Mono'
layout = (3, 2)
ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
acf_ax = plt.subplot2grid(layout, (1, 0))
pacf_ax = plt.subplot2grid(layout, (1, 1))
qq_ax = plt.subplot2grid(layout, (2, 0))
pp_ax = plt.subplot2grid(layout, (2, 1))
y.plot(ax=ts_ax)
ts_ax.set_title('Time Series Analysis Plots')
smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5)
sm.qqplot(y, line='s', ax=qq_ax)
qq_ax.set_title('QQ Plot')
scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)
plt.tight_layout()
return
python类qqplot()的实例源码
forecast.py 文件源码
项目:software-suite-movie-market-analysis
作者: 93lorenzo
项目源码
文件源码
阅读 39
收藏 0
点赞 0
评论 0
def plot_resid(model, x):
'''
Given a trained StatsModel linear regression model, plot the residual error
in a scatter plot as well as a qqplot
model: a trained StatsModel linear regression model.
x: the input data which was used to train the model.
returns: the figure upon which the residuals were drawn
'''
fig, ax_list = plt.subplots(1, 2)
y_hat = model.predict(x)
resid = model.outlier_test()['student_resid']
ax_list[0].scatter(y_hat, resid, alpha=.2)
ax_list[0].axhline(0, linestyle='--')
sm.qqplot(resid, line='s', ax=ax_list[1])
fig.tight_layout()
return fig
def cross_section_qqplot(data, factor_name, date):
'''
??
--------------------------------
data:DataFrame(index:[Date,IDs],factor1,factor2,...)
factor_name:str
date?str
'''
ax = plt.gca()
plot_data = data.ix[(date,), factor_name].values
fig = sm.qqplot(plot_data, line='45', fit=True,ax=ax)
plt.show()
return ax
# ??4
# ic ???
regression_modeling.py 文件源码
项目:-Python-Analysis_of_wine_quality
作者: ekolik
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def mult_regression(wine_set):
# center quantitative IVs for regression analysis
w = wine_set['quality']
wine_set = wine_set - wine_set.mean()
wine_set['quality'] = w
print ("OLS multivariate regression model")
# first i have run with all columns; than chose the most significant for each wine set and rerun:
if len(wine_set) < 2000:
# for red
model1 = smf.ols(
formula="quality ~ volatile_acidity + chlorides + pH + sulphates + alcohol",
data=wine_set)
else:
# for white
model1 = smf.ols(
formula="quality ~ volatile_acidity + density + pH + sulphates + alcohol",
data=wine_set)
results1 = model1.fit()
print(results1.summary())
# q-q plot for normality
qq = sm.qqplot(results1.resid, line = 'r')
plt.show()
# plot of residuals
stdres = pd.DataFrame(results1.resid_pearson)
plt.plot(stdres, 'o', ls = 'None')
l = plt.axhline(y=0, color = 'r')
plt.ylabel('Standardized redisual')
plt.xlabel('Observation number')
plt.show()
# # diagnostic plots
# figure1 = plt.figure(figsize=(12, 8))
# figure1 = sm.graphics.plot_regress_exog(results1, "alcohol", fig = figure1)
# plt.show()
#
# figure1 = plt.figure(figsize=(12, 8))
# figure1 = sm.graphics.plot_regress_exog(results1, "sulphates", fig = figure1)
# plt.show()
# leverage plot
figure1 = sm.graphics.influence_plot(results1, size=8)
plt.show()
# call(mult_regression)
# ____________________________ Logistic Regression _____________________