def cross_validate(train):
#separate training and validation set
X_train,X_valid= split_train_validation(train)
scores = []; preds = []
for i in xrange(len(X_train)):
#convert X_train, Y_train etc... to xgboost matrix
dtrain = xgb.DMatrix(X_train[i][['phone_brand','device_model','timestamp']], label = X_train[i]['group'],missing=np.nan)
dvalid = xgb.DMatrix(X_valid[i][['phone_brand','device_model','timestamp']], label = X_valid[i]['group'],missing=np.nan)
#predict with xgboost
parameters = {'max_depth':4,'eta':0.1,'silent':1, 'subsample':0.8,'colsample_bytree':0.8,
'objective':'multi:softprob','booster':'gbtree','early_stopping_rounds':50,
'num_class':12,'num_boost_round':1000,'eval_metric':'mlogloss'}
plst = parameters.items()
bst = xgb.train(plst, dtrain)
pred = bst.predict(dvalid)
scores.append(log_loss(X_valid[i]['group'].tolist(),pred))
pred = pd.DataFrame(pred, index = X_valid[i].index, columns=target_encoder.classes_)
preds.append(pred)
return scores, preds
评论列表
文章目录