def plot_ks_cdf(y_true,y_score,pos_label=1,label_map=None,color_map=None,decimals=0,
xlabel='Score',ylabel='CumSum',fontsize=12,figsize=(18,8),close=True):
'''
??: ??KS???????????????????
???:
y_true: ?????series?????????{0,1}?{-1,1}??
y_score: ?????series????????????????????
pos_label: int?????positive?????
label_map: ???????????????{0:'Good',1:'Bad'}?
color_map: ????????????????{0:'g',1:'r'}?
decimals: ?????????
xlabel: ??????xlabel?
ylabel: ??????ylabel?
fontsize: int??????
close: ???????
???:
????????{'ks': KS??'split': KS??????'fig': ?????????}?
'''
if label_map is None:
label_map={0:'Good',1:'Bad'}
ks_dict = {}
y_true=pd.Series(y_true)
y_score=pd.Series(y_score)
y_score_dataframe=pd.concat([y_true,y_score],axis=1)
ks=cal_ks(y_true,y_score_dataframe,pos_label=pos_label,return_split=False,decimals=decimals)
score_split=cal_ks(y_true,y_score_dataframe,pos_label=pos_label,return_split=True,decimals=decimals)
crossfreq = pd.crosstab(y_score.round(decimals),y_true)
crossdens = crossfreq.cumsum(axis=0) / crossfreq.sum()
color=crossdens.columns.map(lambda xx: color_map.get(xx,None))
crossdens=crossdens.rename(columns=label_map)
crossdens.columns.name=''
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
crossdens.plot(kind='line',ax=ax,fontsize=fontsize,color=color)
ax.set_xlabel(xlabel,fontsize=fontsize)
ax.set_ylabel(ylabel,fontsize=fontsize)
ax.set_title('CDF Curve (KS=%.2f, SPLIT=%.*f)'%(ks,decimals,score_split),fontsize=fontsize)
if close:
plt.close('all')
ks_dict['ks'] = ks
ks_dict['split'] = score_split
ks_dict['fig'] = fig
return ks_dict
评论列表
文章目录