def rf1(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
scores = list()
num_seeds = 1
num_splits = 3
base_seed = 13
ss = model_selection.ShuffleSplit(n_splits=num_splits)
for seed in range(base_seed, base_seed + num_seeds):
ss = model_selection.ShuffleSplit(n_splits=num_splits, random_state=seed)
for n, (itrain, ival) in enumerate(ss.split(train2, y)):
reg = ensemble.RandomForestClassifier(max_depth=9,
random_state=seed,
n_estimators=500,
n_jobs=-2)
reg.fit(train2[itrain], y[itrain])
p = reg.predict_proba(train2[ival])[:,1]
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
print(cname, 'seed %d step %d: '%(seed, n+1), score, now())
scores.append(score)
z[cname] += pconvert(reg.predict_proba(test2)[:,1])
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
z[cname] /= num_splits * num_seeds
v[cname] /= num_seeds
评论列表
文章目录