def cal_ks(y,y_prob,pos_label=1,return_split=False,decimals=0):
'''
??KS????????
y: ?????series?????????{0,1}?{-1,1}??
y_prob: ?????dataframe???????????????????????????????????
?????????series?????????dataframe?????
pos_label: int?????positive?????
return_split: ??????????
decimals: ?????????
??KS??????????????sklearn???????
'''
y=pd.Series(pd.Series(y).values)
if len(y_prob.shape)==1:
y_pred=pd.Series(pd.Series(y_prob).values)
else:
y_pred=pd.Series(pd.DataFrame(y_prob).iloc[:,1].values)
Bad=y_pred[y==pos_label]
Good=y_pred[y!=pos_label]
ks, pvalue = stats.ks_2samp(Bad.values, Good.values)
if not return_split:
return ks
crossfreq=pd.crosstab(y_pred.round(decimals),y)
crossdens = crossfreq.cumsum(axis=0) / crossfreq.sum()
crossdens['gap'] = abs(crossdens[0] - crossdens[1])
score_split = crossdens[crossdens['gap'] == crossdens['gap'].max()].index[0]
return score_split
评论列表
文章目录