def fea_plot(xg_model, feature, label, type = 'weight', max_num_features = None):
fig, AX = plt.subplots(nrows=1, ncols=2)
xgb.plot_importance(xg_model, xlabel=type, importance_type='weight', ax=AX[0], max_num_features=max_num_features)
fscore = xg_model.get_score(importance_type=type)
fscore = sorted(fscore.items(), key=itemgetter(1), reverse=True) # sort scores
fea_index = get_fea_index(fscore, max_num_features)
feature = feature[:, fea_index]
dimension = len(fea_index)
X = range(1, dimension+1)
Yp = np.mean(feature[np.where(label==1)[0]], axis=0)
Yn = np.mean(feature[np.where(label!=1)[0]], axis=0)
for i in range(0, dimension):
param = np.fmax(Yp[i], Yn[i])
Yp[i] /= param
Yn[i] /= param
p1 = AX[1].bar(X, +Yp, facecolor='#ff9999', edgecolor='white')
p2 = AX[1].bar(X, -Yn, facecolor='#9999ff', edgecolor='white')
AX[1].legend((p1,p2), ('Malware', 'Normal'))
AX[1].set_title('Comparison of selected features by their means')
AX[1].set_xlabel('Feature Index')
AX[1].set_ylabel('Mean Value')
AX[1].set_ylim(-1.1, 1.1)
plt.xticks(X, fea_index+1, rotation=80)
plt.suptitle('Feature Selection results')
python类plot_importance()的实例源码
def run_train_validation(self):
x_train, y_train,x_validation,y_validation = self.get_train_validationset()
dtrain = xgb.DMatrix(x_train, label= y_train,feature_names=x_train.columns)
dvalidation = xgb.DMatrix(x_validation, label= y_validation,feature_names=x_validation.columns)
self.set_xgb_parameters()
evals=[(dtrain,'train'),(dvalidation,'eval')]
model = xgb.train(self.xgb_params, dtrain, evals=evals, **self.xgb_learning_params)
xgb.plot_importance(model)
plt.show()
print "features used:\n {}".format(self.get_used_features())
return
def plot_importance(importance_type='weight'):
"""
How the importance is calculated: either "weight", "gain", or "cover"
"weight" is the number of times a feature appears in a tree
"gain" is the average"gain"of splits which use the feature
"cover" is the average coverage of splits which use the feature
where coverage is defined as the number of samples affected by the split
"""
xgb.plot_importance(model, importance_type=importance_type,
max_num_features=40,
)
def plot_importance(self):
ax = xgb.plot_importance(self.model)
self.save_topn_features()
return ax
def save_topn_features(self, fname="XGBRegressor_topn_features.txt", topn=-1):
ax = xgb.plot_importance(self.model)
yticklabels = ax.get_yticklabels()[::-1]
if topn == -1:
topn = len(yticklabels)
else:
topn = min(topn, len(yticklabels))
with open(fname, "w") as f:
for i in range(topn):
f.write("%s\n"%yticklabels[i].get_text())
def plot_importance(self):
ax = xgb.plot_importance(self.model)
self.save_topn_features()
return ax
def save_topn_features(self, fname="XGBClassifier_topn_features.txt", topn=10):
ax = xgb.plot_importance(self.model)
yticklabels = ax.get_yticklabels()[::-1]
if topn == -1:
topn = len(yticklabels)
else:
topn = min(topn, len(yticklabels))
with open(fname, "w") as f:
for i in range(topn):
f.write("%s\n"%yticklabels[i].get_text())