def AB(k_model, clustersDict):
'''
Computes AB testing on clustered samples.
Parameters
----------
k_model : sklearn.KMEANS
Trained Kmeans model.
data: dict
Dictionary containing DataFrames for all clusters.
Returns
-------
Plot : matplotlib.lines.Line2D
Figure.
'''
tariffs = ['E', 'A', 'B', 'C', 'D']
timeofuse = {'day': [8,17], 'peak':[17,19], 'night': [0,8], 'day2':[19,24]}
#Create dict with p-value findings and power findings.
for cluster in clustersDict:
df = clustersDict[cluster]
df = df.ix[df.Residential_Tariff.isin(tariffs)]
df.Residential_Tariff = df.Residential_Tariff.apply(lambda x: 'Control' if x == 'E' else 'Trial')
_df_Control = df.ix[df.Residential_Tariff == 'Control'].iloc[:,:-3].T
_df_Trial = df.ix[df.Residential_Tariff == 'Trial'].iloc[:,:-3].T
for time in timeofuse:
control = _df_Control.iloc[timeofuse[time][0]:timeofuse[time][1]+1,:].sum()
trial = _df_Trial.iloc[timeofuse[time][0]:timeofuse[time][1]+1,:].sum()
fig = plt.figure()
ax_ = fig.add_subplot(1,1,1)
# control_ = np.log(control)
# trial_ = np.log(trial)
control.plot(kind = 'kde', ax= ax_, alpha = 0.5 )
trial.plot(kind = 'kde', ax=ax_, alpha = 0.5)
ax_.set_title('Cluster %d: %s' % (cluster+1, time))
ax_.set_xlim((1,5))
ax_.set_ylim([0, 0.6])
ax_.set_xlabel('Consumption (kWh)')
# ax_.set_ylabel("Number of users")
plt.show()
print 'Cluster %d, %s p-value:' % ((cluster +1), time), ttest_ind(control, trial, equal_var=False)[1], 'power: ', stat_power(control, trial), 'magnitude: ', np.mean(trial)/np.mean(control) -1
metrics.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录