def data_pre_process(train_path, test_path, label, drop_list=None):
train_dataset = pandas.read_csv(train_path)
if drop_list:
train_dataset = train_dataset.drop(drop_list, axis=1)
y_train = train_dataset[label].astype(int)
print y_train.dtypes
X_train = train_dataset.drop(label, axis=1)
test_dataset = pandas.read_csv(test_path)
if drop_list:
test_dataset = test_dataset.drop(drop_list, axis=1)
y_test = test_dataset[label].astype(int)
print y_test.dtypes
X_test = test_dataset.drop(label, axis=1)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)
return dtrain, dtest
util_xgb.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录