def MAUC(data, num_classes):
"""
Calculates the MAUC over a set of multi-class probabilities and
their labels. This is equation 7 in Hand and Till's 2001 paper.
NB: The class labels should be in the set [0,n-1] where n = # of classes.
The class probability should be at the index of its label in the
probability list.
I.e. With 3 classes the labels should be 0, 1, 2. The class probability
for class '1' will be found in index 1 in the class probability list
wrapped inside the zipped list with the labels.
Args:
data (list): A zipped list (NOT A GENERATOR) of the labels and the
class probabilities in the form (m = # data instances):
[(label1, [p(x1c1), p(x1c2), ... p(x1cn)]),
(label2, [p(x2c1), p(x2c2), ... p(x2cn)])
...
(labelm, [p(xmc1), p(xmc2), ... (pxmcn)])
]
num_classes (int): The number of classes in the dataset.
Returns:
The MAUC as a floating point value.
"""
# Find all pairwise comparisons of labels
class_pairs = [x for x in itertools.combinations(range(num_classes), 2)]
# Have to take average of A value with both classes acting as label 0 as this
# gives different outputs for more than 2 classes
sum_avals = 0
for pairing in class_pairs:
sum_avals += (a_value(data, zero_label=pairing[0], one_label=pairing[1]) +
a_value(data, zero_label=pairing[1], one_label=pairing[0])) / 2.0
return sum_avals * (2 / float(num_classes * (num_classes-1))) # Eqn 7
评论列表
文章目录