def __call__(self, orig_img, input_size=None):
xp = self.model.xp
orig_input_width, orig_input_height = orig_img.size
if input_size is not None:
img = orig_img.resize(input_size, Image.BILINEAR)
else:
img = utils.reshape_to_yolo_size(orig_img)
input_width, input_height = img.size
img = np.asarray(img, dtype=np.float32) / 255.0
img = img.transpose(2, 0, 1)
# forward
x = xp.asarray(img[np.newaxis, :, :, :])
x, y, w, h, conf, prob = self.model.predict(x)
# parse results
_, _, _, grid_h, grid_w = x.shape
x = F.reshape(x, (self.n_boxes, grid_h, grid_w)).data
y = F.reshape(y, (self.n_boxes, grid_h, grid_w)).data
w = F.reshape(w, (self.n_boxes, grid_h, grid_w)).data
h = F.reshape(h, (self.n_boxes, grid_h, grid_w)).data
conf = F.reshape(conf, (self.n_boxes, grid_h, grid_w)).data
prob = F.transpose(F.reshape(prob, (self.n_boxes, self.n_classes, grid_h, grid_w)), (1, 0, 2, 3)).data
x = cuda.to_cpu(x)
y = cuda.to_cpu(y)
w = cuda.to_cpu(w)
h = cuda.to_cpu(h)
conf = cuda.to_cpu(conf)
prob = cuda.to_cpu(prob)
detected_indices = (conf * prob).max(axis=0) > self.detection_thresh
x = x[detected_indices]
y = y[detected_indices]
w = w[detected_indices]
h = h[detected_indices]
conf = conf[detected_indices]
prob = prob.transpose(1, 2, 3, 0)[detected_indices]
results = []
for i in range(detected_indices.sum()):
class_id = prob[i].argmax()
label = self.labels[class_id]
results.append({
'class_id': class_id,
'label': label,
'probs': prob[i],
'conf' : conf[i],
'objectness': conf[i] * prob[i].max(),
'box' : utils.Box(
x[i] * orig_input_width,
y[i] * orig_input_height,
w[i] * orig_input_width,
h[i] * orig_input_height).crop_region(orig_input_height, orig_input_width)
})
# nms
nms_results = utils.nms(results, self.iou_thresh)
return nms_results
评论列表
文章目录