def show_feature_importance(gbdt, feature_names=None):
importance = gbdt.get_fscore(fmap='xgb.fmap')
importance = sorted(importance.items(), key=operator.itemgetter(1))
df = pd.DataFrame(importance, columns=['feature', 'fscore'])
df['fscore'] = df['fscore'] / df['fscore'].sum()
print "feature importance", df
if feature_names is not None:
used_features = df['feature']
unused_features = [f for f in feature_names if f not in used_features]
print "[IDF]Unused features:", str(unused_features)
plt.figure()
df.plot()
df.plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(6, 10))
plt.title('XGBoost Feature Importance')
plt.xlabel('relative importance')
plt.gcf().savefig('feature_importance_xgb.png')
评论列表
文章目录