def rf1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
N_splits = 300
scores = []
skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
for n, (itrain, ival) in enumerate(skf.split(train2, y)):
print('step %d of %d'%(n+1, skf.n_splits), now())
clf = ensemble.RandomForestRegressor(n_estimators=1000,
max_depth=3,
random_state=13)
clf.fit(train2[itrain], y[itrain])
p = clf.predict(train2[ival])
v.loc[ival, cname] += p
score = metrics.log_loss(y[ival], p)
z[cname] += np.log1p(clf.predict(test2))
print(cname, 'step %d: score'%(n+1), score, now())
scores.append(score)
print('validation loss: ', metrics.log_loss(y, v[cname]))
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= N_splits
评论列表
文章目录