def fea_plot(xg_model, feature, label, type = 'weight', max_num_features = None):
fig, AX = plt.subplots(nrows=1, ncols=2)
xgb.plot_importance(xg_model, xlabel=type, importance_type='weight', ax=AX[0], max_num_features=max_num_features)
fscore = xg_model.get_score(importance_type=type)
fscore = sorted(fscore.items(), key=itemgetter(1), reverse=True) # sort scores
fea_index = get_fea_index(fscore, max_num_features)
feature = feature[:, fea_index]
dimension = len(fea_index)
X = range(1, dimension+1)
Yp = np.mean(feature[np.where(label==1)[0]], axis=0)
Yn = np.mean(feature[np.where(label!=1)[0]], axis=0)
for i in range(0, dimension):
param = np.fmax(Yp[i], Yn[i])
Yp[i] /= param
Yn[i] /= param
p1 = AX[1].bar(X, +Yp, facecolor='#ff9999', edgecolor='white')
p2 = AX[1].bar(X, -Yn, facecolor='#9999ff', edgecolor='white')
AX[1].legend((p1,p2), ('Malware', 'Normal'))
AX[1].set_title('Comparison of selected features by their means')
AX[1].set_xlabel('Feature Index')
AX[1].set_ylabel('Mean Value')
AX[1].set_ylim(-1.1, 1.1)
plt.xticks(X, fea_index+1, rotation=80)
plt.suptitle('Feature Selection results')
评论列表
文章目录