def test_probabilities(model: ClassifierMixin, X: np.array, y: pd.Series,
bins: int = 10, threshold: float = 0.5):
"""Print confusion matrix based on class probability."""
probs = [p[1] for p in model.predict_proba(X)]
print('\tProbabilities')
df = pd.DataFrame({'prob': probs, 'label': y})
step = 1 / bins
cut_labels = [round(step * f, 1) for f in range(10)]
by_prob = (df.groupby(pd.cut(df['prob'], bins, labels=cut_labels))
.agg(['sum', 'count'])['label'])
print('\t\tprobs\t1\t0\tacc')
for index, row in by_prob.iloc[::-1].iterrows():
ones = row['sum']
if math.isnan(ones):
ones = 0
else:
ones = int(ones)
count = row['count']
zeros = int(count) - ones
if count > 0:
acc = zeros / count if index < threshold else ones / count
else:
acc = 0.0
print(f'\t\t{index}\t{ones}\t{zeros}\t{acc:.3f}')
评论列表
文章目录