def xgboost_feature_importance(model, train, return_df=False):
features = train.columns
create_feature_map(features)
importance = model.get_fscore(fmap='xgb.fmap')
importance = sorted(importance.items(), key=operator.itemgetter(1))
df = pd.DataFrame(importance, columns=['feature', 'fscore'])
df['fscore'] = df['fscore'] / df['fscore'].sum()
sns.barplot(x="fscore", y="feature", data=df)
# plt.xticks(range(len(df)), df.feature.tolist(), rotation=60)
plt.title('Feature Importances')
plt.ylabel('Relative Importance')
print df
if return_df is True:
return df
评论列表
文章目录