def greedy_elim(df):
# do feature selection using boruta
X = df[[x for x in df.columns if x!='SalePrice']]
y = df['SalePrice']
#model = RandomForestRegressor(n_estimators=50)
model = GradientBoostingRegressor(n_estimators=50, learning_rate=0.05)
# 150 features seems to be the best at the moment. Why this is is unclear.
feat_selector = RFE(estimator=model, step=1, n_features_to_select=150)
# find all relevant features
feat_selector.fit_transform(X.as_matrix(), y.as_matrix())
# check selected features
features_bool = np.array(feat_selector.support_)
features = np.array(X.columns)
result = features[features_bool]
#print(result)
# check ranking of features
features_rank = feat_selector.ranking_
#print(features_rank)
rank = features_rank[features_bool]
#print(rank)
return result
评论列表
文章目录