def plot_feature_importances(columns, X_train, y_train):
feat_labels = columns[1:]
forest = RandomForestClassifier(n_estimators=10000, random_state=0)
forest.fit(X_train, y_train)
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
for f in range(X_train.shape[1]):
print("%2d) %-*s %f" % (
f+1,
30,
feat_labels[indices[f]],
importances[indices[f]],
))
print()
plt.title('Feature Importances')
plt.bar(
range(X_train.shape[1]),
importances[indices],
color='lightblue',
align='center',
)
plt.xticks(range(X_train.shape[1]), feat_labels[indices], rotation=90)
plt.xlim([-1, X_train.shape[1]])
plt.show()
feature_selector = SelectFromModel(forest, threshold=0.15, prefit=True)
X_selected = feature_selector.transform(X_train)
print(X_selected.shape)
评论列表
文章目录