def cross_validate_best_known():
'''
import and clean the tractor data, then do a corss validation on each of the three models we are
training here. A RandomForest, a GradientBoost, and an AdaBoost backed by a DecisionTree. Print
the scores.
The parameters we're using here are the "best" that we've found so far using a grid search.
'''
tractor_data = pd.read_csv('data/train.csv')
tractor_data = cln.clean_all(tractor_data)
X = tractor_data
y = tractor_data.pop('SalePrice')
rf = RandomForestRegressor(max_features=2, min_samples_split=4, n_estimators=50, min_samples_leaf=2)
gb = GradientBoostingRegressor(loss='quantile', learning_rate=0.0001, n_estimators=50, max_features='log2', min_samples_split=2, max_depth=1)
ada_tree_backing = DecisionTreeRegressor(max_features='sqrt', splitter='random', min_samples_split=4, max_depth=3)
ab = AdaBoostRegressor(ada_tree_backing, learning_rate=0.1, loss='square', n_estimators=1000)
validate.cross_v_scores([rf, gb, ab], X, y)
# RandomForestRegressor -- RMLSE: -0.596797712098, R2: 0.0272065373946
# GradientBoostingRegressor -- RMLSE: -0.996134592541, R2: -2.37202164829
# AdaBoostRegressor -- RMLSE: -0.706385708459, R2: -0.103966980393
model_comparison.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录