def response_surface_analysis(df):
"""Perform response surface analysis on df."""
def tally_results(df):
features = [f for f in list(df.columns.values)
if f not in ['classifier', 'test_accuracy']]
classifiers = ['Decision Tree', 'Linear SVC', 'Logistic Regression',
'Multinomial NB', 'Random Forest', 'Voting Classifier']
for classifier in classifiers:
from_class = df['classifier'] == classifier
class_mean = df[from_class]['test_accuracy'].mean()
for feature in features:
with_feature = df[(df[feature] == 1) & from_class]
wo_feature = df[(df[feature] == 0) & from_class]
acc_diff = (with_feature['test_accuracy'].sum()
- wo_feature['test_accuracy'].sum())
mean_diff = acc_diff / 255 + class_mean
yield classifier, feature, mean_diff
results = pd.DataFrame([res for res in tally_results(df)],
columns=['classifier', 'feature', 'effect'])
class_order = list(df.groupby('classifier')['test_accuracy']
.mean()
.sort_values(ascending=False)
.index)
results['classifier'] = pd.Categorical(results['classifier'],
categories=class_order)
feat_order = list(results.groupby('feature')['effect']
.mean()
.sort_values(ascending=False)
.index)
results['feature'] = pd.Categorical(results['feature'],
categories=feat_order)
results.sort_values(['feature', 'classifier'], inplace=True)
print(results)
sns.stripplot('effect', 'feature', hue='classifier', data=results)
sns.plt.legend()
sns.plt.show()
评论列表
文章目录