def train_xgboost():
df = pd.read_csv('data/stage1_labels.csv')
print(df.head())
x = np.array([np.mean(np.load('npy_result/%s.npy' % str(id)), axis=0) for id in df['id'].tolist()])
y = df['cancer'].as_matrix()
trn_x, val_x, trn_y, val_y = cross_validation.train_test_split(x, y, random_state=42, stratify=y,
test_size=0.20)
clf = xgb.XGBRegressor(max_depth=10,
n_estimators=1500,
min_child_weight=9,
learning_rate=0.05,
nthread=8,
subsample=0.80,
colsample_bytree=0.80,
seed=4242)
clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)], verbose=True, eval_metric='logloss', early_stopping_rounds=50)
return clf
评论列表
文章目录