def xgb2(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
v[cname], z[cname] = 0, 0
N_splits = 9
N_seeds = 4
scores = []
skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
xgb_params = dict(
max_depth = 4,
learning_rate = 0.02,
subsample = 0.7,
alpha = 0.015,
#colsample_bytree = 0.8,
objective = 'binary:logistic',
eval_metric = 'logloss',
seed = 1,
silent = 1
)
dtest = xgb.DMatrix(test2)
for s in range(N_seeds):
xgb_params['seed'] = s + 4242
for n, (itrain, ival) in enumerate(skf.split(train2, y)):
dtrain = xgb.DMatrix(train2.ix[itrain], y[itrain])
dvalid = xgb.DMatrix(train2.ix[ival], y[ival])
watch = [(dtrain, 'train'), (dvalid, 'valid')]
clf = xgb.train(xgb_params, dtrain, 10000, watch, early_stopping_rounds=100, verbose_eval=False)
p = clf.predict(dvalid)
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
z[cname] += pconvert(clf.predict(dtest))
print(cname, 'seed %d step %d of %d: '%(xgb_params['seed'], n+1, skf.n_splits), score, now())
scores.append(score)
z[cname] /= N_splits * N_seeds
v[cname] /= N_seeds
print('validation loss: ', metrics.log_loss(y, prestore(v[cname])))
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
评论列表
文章目录