def read_test_train(train_size):
print("Load train.csv")
train = pd.read_hdf("../modified_data/train_original.csv.hdf", 'table')
null_count = train.isnull().sum().sum()
if null_count > 0:
print('Nans:', null_count)
cols = train.isnull().any(axis=0)
print(cols[cols == True])
rows = train.isnull().any(axis=1)
print(rows[rows == True])
print('NANs in train, please check it!')
exit()
split = round((1-train_size)*len(train.index))
train = train[split:]
print("Load test.csv")
test = pd.read_hdf("../modified_data/test.hdf", 'table')
null_count = test.isnull().sum().sum()
if null_count > 0:
print('Nans:', null_count)
cols = test.isnull().any(axis=0)
print(cols[cols == True])
print('NANs in test, please check it!')
exit()
features = get_features(train, test)
return train, test, features
s11_run_xgboost_only_test.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录