def GDBT_regression(X=train_df_munged,Y=label_df['SalePrice']):
est = GradientBoostingRegressor(n_estimators=50,max_depth=3,learning_rate=0.1)
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=0)
est.fit(X_train,Y_train)
y_train_pred = est.predict(X_test)
plt.scatter(y_train_pred,y_train_pred - Y_test,c = 'blue',marker='s', label='error on training data')
plt.title("Linear regression with GDBT")
plt.xlabel("Predicted values")
plt.ylabel("Residuals")
plt.legend(loc="upper left")
plt.hlines(y=0, xmin=10.5, xmax=13.5, color="red")
plt.show()
# Plot predictions
plt.scatter(Y_test, y_train_pred, c="blue", marker="s", label="Training data")
plt.title("Linear regression with GDBT")
plt.xlabel("Predicted values")
plt.ylabel("Real values")
plt.legend(loc="upper left")
plt.plot([10.5, 13.5], [10.5, 13.5], c="red")
plt.show()
print('rmse value:',rmse(Y_test,y_train_pred))
return est
评论列表
文章目录