def IV_calc(data,var):
if data[var].dtypes == "object":
dataf = data.groupby([var])['class'].agg(['count','sum'])
dataf.columns = ["Total","bad"]
dataf["good"] = dataf["Total"] - dataf["bad"]
dataf["bad_per"] = dataf["bad"]/dataf["bad"].sum()
dataf["good_per"] = dataf["good"]/dataf["good"].sum()
dataf["I_V"] = (dataf["good_per"] - dataf["bad_per"]) * np.log(dataf["good_per"]/dataf["bad_per"])
return dataf
else:
data['bin_var'] = pd.qcut(data[var].rank(method='first'),10)
dataf = data.groupby(['bin_var'])['class'].agg(['count','sum'])
dataf.columns = ["Total","bad"]
dataf["good"] = dataf["Total"] - dataf["bad"]
dataf["bad_per"] = dataf["bad"]/dataf["bad"].sum()
dataf["good_per"] = dataf["good"]/dataf["good"].sum()
dataf["I_V"] = (dataf["good_per"] - dataf["bad_per"]) * np.log(dataf["good_per"]/dataf["bad_per"])
return dataf
Chapter 03_Logistic Regression vs Random Forest.py 文件源码
python
阅读 33
收藏 0
点赞 0
评论 0
评论列表
文章目录