def xgboost(train_sample, validation_sample, features, model_param):
def evalmape(preds, dtrain):
labels = dtrain.get_label()
preds = np.power(log_base, preds) - 1
# return a pair metric_name, result
# since preds are margin(before logistic transformation, cutoff at 0)
return 'mape', np.abs((labels - preds) / labels).sum() / len(labels)
param = {'max_depth': model_param['depth'], 'eta': model_param['lr'], 'silent': 1, 'objective': 'reg:linear', 'booster': 'gbtree',
'subsample': model_param['sample'],
'seed':model_param['seed'],
'colsample_bytree':1, 'min_child_weight':1, 'gamma':0}
param['eval_metric'] = 'mae'
num_round = model_param['tree']
log_base = np.e
plst = param.items()
dtrain = xgb.DMatrix(train_sample[features], np.log1p(train_sample['volume'])/np.log(log_base))
dtest = xgb.DMatrix(validation_sample[features], validation_sample['volume'])
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
bst = xgb.train(plst, dtrain, num_round, watchlist, feval=evalmape)
xgboost_prob = np.power(log_base, bst.predict(dtest)) - 1
# MAPE
print_mape(validation_sample['volume'], xgboost_prob, 'XGBOOST')
return xgboost_prob
评论列表
文章目录