def advanced_scoring_classifiers(probas, actuals, name=None):
# pandas Series don't play nice here. Make sure our actuals list is indeed a list
actuals = list(actuals)
predictions = list(probas)
print('Here is our brier-score-loss, which is the default value we optimized for while training, and is the value returned from .score() unless you requested a custom scoring metric')
print('It is a measure of how close the PROBABILITY predictions are.')
if name != None:
print(name)
# Sometimes we will be given "flattened" probabilities (only the probability of our positive label), while other times we might be given "nested" probabilities (probabilities of both positive and negative, in a list, for each item).
try:
probas = [proba[1] for proba in probas]
except:
pass
print(format(brier_score_loss(actuals, probas), '.4f'))
print('\nHere is the trained estimator\'s overall accuracy (when it predicts a label, how frequently is that the correct label?)')
predicted_labels = []
for pred in probas:
if pred >= 0.5:
predicted_labels.append(1)
else:
predicted_labels.append(0)
print(format(accuracy_score(y_true=actuals, y_pred=predicted_labels) * 100, '.1f') + '%')
print('\nHere is a confusion matrix showing predictions and actuals by label')
#it would make sense to use sklearn's confusion_matrix here but it apparently has no labels
#took this idea instead from: http://stats.stackexchange.com/a/109015
conf = pd.crosstab(pd.Series(actuals), pd.Series(predicted_labels), rownames=['v Actual v'], colnames=['Predicted >'], margins=True)
print(conf)
print('Here is the accuracy of our trained estimator at each level of predicted probabilities')
# create summary dict
summary_dict = OrderedDict()
for num in range(0, 110, 10):
summary_dict[num] = []
for idx, proba in enumerate(probas):
proba = math.floor(int(proba * 100) / 10) * 10
summary_dict[proba].append(actuals[idx])
for k, v in summary_dict.items():
if len(v) > 0:
print('Predicted probability: ' + str(k) + '%')
actual = sum(v) * 1.0 / len(v)
# Format into a prettier number
actual = round(actual * 100, 0)
print('Actual: ' + str(actual) + '%')
print('# preds: ' + str(len(v)) + '\n')
print('\n\n')
评论列表
文章目录