def test_partition_cdtype(self):
d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
('Lancelot', 1.9, 38)],
dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])
tgt = np.sort(d, order=['age', 'height'])
assert_array_equal(np.partition(d, range(d.size),
order=['age', 'height']),
tgt)
assert_array_equal(d[np.argpartition(d, range(d.size),
order=['age', 'height'])],
tgt)
for k in range(d.size):
assert_equal(np.partition(d, k, order=['age', 'height'])[k],
tgt[k])
assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
tgt[k])
d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
tgt = np.sort(d)
assert_array_equal(np.partition(d, range(d.size)), tgt)
for k in range(d.size):
assert_equal(np.partition(d, k)[k], tgt[k])
assert_equal(d[np.argpartition(d, k)][k], tgt[k])
python类argpartition()的实例源码
def GetFeatures(self, data):
closestPrototypesIndxs = []
D = self.layers[0] - (np.array(data)*self.stateScale + self.bias)
D = np.sqrt(sum(D.T**2)) # a bottlenect for sure
indexes = np.argpartition(D, self.c[0], axis=0)[:self.c[0]]
for i in range(1,len(self.layers)):
D = np.sum(np.setxor1d(self.layers[i], indexes, True), axis=1)
# phi = np.zeros(self.prototypeList[i])
# phi[indexes] = 1
# D = np.sum(np.logical_xor(self.layers[i], phi), axis=1)
indexes = np.argpartition(D, self.c[i], axis=0)[:self.c[i]]
return indexes
def process_frame_for_game_play(frame):
"""Assumes a grayscale frame"""
histogram = skimage.exposure.histogram(frame[40:])
if np.unique(histogram[0]).size < 3:
return None
max_indices = np.argpartition(histogram[0], -3)[-3:]
for index in sorted(max_indices)[:2]:
frame[frame == index] = 0
threshold = skimage.filters.threshold_otsu(frame[40:])
bw_frame = frame > threshold
return bw_frame
def test_partition_cdtype(self):
d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
('Lancelot', 1.9, 38)],
dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])
tgt = np.sort(d, order=['age', 'height'])
assert_array_equal(np.partition(d, range(d.size),
order=['age', 'height']),
tgt)
assert_array_equal(d[np.argpartition(d, range(d.size),
order=['age', 'height'])],
tgt)
for k in range(d.size):
assert_equal(np.partition(d, k, order=['age', 'height'])[k],
tgt[k])
assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
tgt[k])
d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
tgt = np.sort(d)
assert_array_equal(np.partition(d, range(d.size)), tgt)
for k in range(d.size):
assert_equal(np.partition(d, k)[k], tgt[k])
assert_equal(d[np.argpartition(d, k)][k], tgt[k])
def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
# print("Type - Test :")
# print(type(video_ids[video_index]))
# print(video_ids[video_index].decode('utf-8'))
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n"
def calculate_precision_at_equal_recall_rate(predictions, actuals):
"""Performs a local (numpy) calculation of the PERR.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
Returns:
float: The average precision at equal recall rate across the entire batch.
"""
aggregated_precision = 0.0
num_videos = actuals.shape[0]
for row in numpy.arange(num_videos):
num_labels = int(numpy.sum(actuals[row]))
top_indices = numpy.argpartition(predictions[row],
-num_labels)[-num_labels:]
item_precision = 0.0
for label_index in top_indices:
if predictions[row][label_index] > 0:
item_precision += actuals[row][label_index]
item_precision /= top_indices.size
aggregated_precision += item_precision
aggregated_precision /= num_videos
return aggregated_precision
def top_k_triplets(predictions, labels, k=20):
"""Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
(prediction, class) format"""
m = len(predictions)
k = min(k, m)
indices = numpy.argpartition(predictions, -k)[-k:]
return [(index, predictions[index], labels[index]) for index in indices]
def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
# print("Type - Test :")
# print(type(video_ids[video_index]))
# print(video_ids[video_index].decode('utf-8'))
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n"
def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
# print("Type - Test :")
# print(type(video_ids[video_index]))
# print(video_ids[video_index].decode('utf-8'))
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n"
def calculate_precision_at_equal_recall_rate(predictions, actuals):
"""Performs a local (numpy) calculation of the PERR.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
Returns:
float: The average precision at equal recall rate across the entire batch.
"""
aggregated_precision = 0.0
num_videos = actuals.shape[0]
for row in numpy.arange(num_videos):
num_labels = int(numpy.sum(actuals[row]))
top_indices = numpy.argpartition(predictions[row],
-num_labels)[-num_labels:]
item_precision = 0.0
for label_index in top_indices:
if predictions[row][label_index] > 0:
item_precision += actuals[row][label_index]
item_precision /= top_indices.size
aggregated_precision += item_precision
aggregated_precision /= num_videos
return aggregated_precision
def top_k_triplets(predictions, labels, k=20):
"""Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
(prediction, class) format"""
m = len(predictions)
k = min(k, m)
indices = numpy.argpartition(predictions, -k)[-k:]
return [(index, predictions[index], labels[index]) for index in indices]
def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
# print("Type - Test :")
# print(type(video_ids[video_index]))
# print(video_ids[video_index].decode('utf-8'))
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n"
def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
# print("Type - Test :")
# print(type(video_ids[video_index]))
# print(video_ids[video_index].decode('utf-8'))
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n"
def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
# print("Type - Test :")
# print(type(video_ids[video_index]))
# print(video_ids[video_index].decode('utf-8'))
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n"
def calculate_precision_at_equal_recall_rate(predictions, actuals):
"""Performs a local (numpy) calculation of the PERR.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
Returns:
float: The average precision at equal recall rate across the entire batch.
"""
aggregated_precision = 0.0
num_videos = actuals.shape[0]
for row in numpy.arange(num_videos):
num_labels = int(numpy.sum(actuals[row]))
top_indices = numpy.argpartition(predictions[row],
-num_labels)[-num_labels:]
item_precision = 0.0
for label_index in top_indices:
if predictions[row][label_index] > 0:
item_precision += actuals[row][label_index]
item_precision /= top_indices.size
aggregated_precision += item_precision
aggregated_precision /= num_videos
return aggregated_precision
def top_k_triplets(predictions, labels, k=20):
"""Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
(prediction, class) format"""
m = len(predictions)
k = min(k, m)
indices = numpy.argpartition(predictions, -k)[-k:]
return [(index, predictions[index], labels[index]) for index in indices]
def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n"
def __call__(self, words, weights, vocabulary_max):
if len(words) < vocabulary_max * self.trigger_ratio:
return words, weights
if not isinstance(words, numpy.ndarray):
words = numpy.array(words)
# Tail optimization does not help with very large vocabularies
if len(words) > vocabulary_max * 2:
indices = numpy.argpartition(weights, len(weights) - vocabulary_max)
indices = indices[-vocabulary_max:]
words = words[indices]
weights = weights[indices]
return words, weights
# Vocabulary typically consists of these three parts:
# 1) the core - we found it's border - `core_end` - 15%
# 2) the body - 70%
# 3) the minor tail - 15%
# (1) and (3) are roughly the same size
# (3) can be safely discarded, (2) can be discarded with care,
# (1) shall never be discarded.
sorter = numpy.argsort(weights)[::-1]
weights = weights[sorter]
trend_start = int(len(weights) * 0.2)
trend_finish = int(len(weights) * 0.8)
z = numpy.polyfit(numpy.arange(trend_start, trend_finish),
numpy.log(weights[trend_start:trend_finish]),
1)
exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights)))
avg_error = numpy.abs(weights[trend_start:trend_finish] -
exp_z[trend_start:trend_finish]).mean()
tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1])
weights = weights[:-tail_size][:vocabulary_max]
words = words[sorter[:-tail_size]][:vocabulary_max]
return words, weights