def run(result_csv_path):
train_x,train_y = load_data(train_csv_path,True)
test_x = load_data(test_csv_path,False)
print('load data successfully.........')
layer1_rf_paramters ={
'max_depth':range(15,21),
'max_features': [0.5,0.6,0.8],
'min_samples_leaf':[1,3,10]
}
print('layer 1 train..........')
layer1_rf = RandomForestRegressor(
n_estimators = 2500,
n_jobs = -1
)
layer1_gs_rf = GridSearchCV(layer1_rf,param_grid = layer1_rf_paramters)
layer1_gs_rf.fit(train_x,train_y)
################# save model##################
joblib.dump(layer1_gs_rf,'weights/layer1_'+Model_Name+'.m')
#layer1_rf = joblib.load('weights/layer1_'+Model_Name+'.m')
tr_pred = layer1_gs_rf.predict(train_x)
train_x = feature_engineer(layer1_gs_rf,train_x,tr_pred)
te_pred = layer1_gs_rf.predict(test_x)
test_x = feature_engineer(layer1_gs_rf,test_x,te_pred)
print('layer 2 train ............')
layer2_rf = RandomForestRegressor(
n_jobs = -1,
n_estimators = 1000,
max_features = 'sqrt',
max_depth = 18,
bootstrap = False
)
layer2_rf.fit(train_x,train_y)
joblib.dump(layer2_rf,'weights/layer2_'+Model_Name+'.m')
y_pred = layer2_rf.predict(test_x)
############ save_results ########################
save_results(result_csv_path,y_pred)
评论列表
文章目录