def run_test_with_model(train, test, features, target, random_state=0):
start_time = time.time()
test_size = 0.02
# X_train, X_valid = train_test_split(train, test_size=test_size, random_state=random_state)
split = round((1-test_size)*len(train.index))
X_train = train[0:split]
X_valid = train[split:]
print('Length train:', len(X_train.index))
print('Length valid:', len(X_valid.index))
# watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
# gbm = xgb.train(params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, feval=auc_xgboost, verbose_eval=True)
# gbm = xgb.train(params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, verbose_eval=True)
gbm = xgb.Booster()
gbm.load_model("models/model_0.968276662916_eta_0.2_md_5_test_size_0.02.bin")
print("Validating...")
check = gbm.predict(xgb.DMatrix(X_valid[features]))
score = roc_auc_score(X_valid[target].values, check)
score_kaggle = auc(X_valid[target].values, check)
print('Check error value: {:.6f} (Kaggle: {:.6f})'.format(score, score_kaggle))
imp = get_importance(gbm, features)
print('Importance array: ', imp)
print("Predict test set...")
test_prediction = gbm.predict(xgb.DMatrix(test[features]))
print('Training time: {} minutes'.format(round((time.time() - start_time)/60, 2)))
return test_prediction.tolist(), score
评论列表
文章目录