def mean_target_rate(name,out,idcol,ycol):
if os.path.exists(out):
return pickle.load(open(out,'rb'))
yc,cc = defaultdict(float),defaultdict(float)
for c,row in enumerate(csv.DictReader(open(name))):
y = float(row[ycol])
for i in row:
if i in [idcol,ycol]:
continue
v = "%s-%s"%(i,row[i])
yc[v] += y
cc[v] += 1.0
if c>0 and c%100000 == 0:
print("rows %d len_cc %d"%(c,len(cc)))
for i in yc:
yc[i] = yc[i]/cc[i]
pickle.dump(yc,open(out,'wb'))
return yc
评论列表
文章目录