def recommend(self, ids: Sequence[int],
predictions: numpy.ndarray,
n: int=1, diversity: float=0.5) -> Sequence[int]:
"""Recommends an instance to label.
Notes
-----
Assumes predictions are probabilities of positive binary label.
Parameters
----------
ids
Sequence of IDs in the unlabelled data pool.
predictions
N x 1 x C array of predictions. The ith row must correspond with the
ith ID in the sequence.
n
Number of recommendations to make.
diversity
Recommendation diversity in [0, 1].
Returns
-------
Sequence[int]
IDs of the instances to label.
"""
if predictions.shape[1] != 1:
raise ValueError('Uncertainty sampling must have one predictor')
assert len(ids) == predictions.shape[0]
# x* = argmin p(y1^ | x) - p(y2^ | x) where yn^ = argmax p(yn | x)
# (Settles 2009).
partitioned = numpy.partition(predictions, -2, axis=2)
most_likely = partitioned[:, 0, -1]
second_most_likely = partitioned[:, 0, -2]
assert most_likely.shape == (len(ids),)
scores = 1 - (most_likely - second_most_likely)
indices = choose_boltzmann(self._db.read_features(ids), scores, n,
temperature=diversity * 2)
return [ids[i] for i in indices]
# For safe string-based access to recommender classes.
评论列表
文章目录