yolov2_predict_caltech.py 文件源码-python代码片段

def __call__(self, orig_img, input_size=None):
        xp = self.model.xp
        orig_input_width, orig_input_height = orig_img.size
        if input_size is not None:
            img = orig_img.resize(input_size, Image.BILINEAR)
        else:
            img = utils.reshape_to_yolo_size(orig_img)
        input_width, input_height = img.size
        img = np.asarray(img, dtype=np.float32) / 255.0
        img = img.transpose(2, 0, 1)

        # forward
        x = xp.asarray(img[np.newaxis, :, :, :])
        x, y, w, h, conf, prob = self.model.predict(x)

        # parse results
        _, _, _, grid_h, grid_w = x.shape

        x = F.reshape(x, (self.n_boxes, grid_h, grid_w)).data
        y = F.reshape(y, (self.n_boxes, grid_h, grid_w)).data
        w = F.reshape(w, (self.n_boxes, grid_h, grid_w)).data
        h = F.reshape(h, (self.n_boxes, grid_h, grid_w)).data
        conf = F.reshape(conf, (self.n_boxes, grid_h, grid_w)).data
        prob = F.transpose(F.reshape(prob, (self.n_boxes, self.n_classes, grid_h, grid_w)), (1, 0, 2, 3)).data
        x = cuda.to_cpu(x)
        y = cuda.to_cpu(y)
        w = cuda.to_cpu(w)
        h = cuda.to_cpu(h)
        conf = cuda.to_cpu(conf)
        prob = cuda.to_cpu(prob)
        detected_indices = (conf * prob).max(axis=0) > self.detection_thresh
        x = x[detected_indices]
        y = y[detected_indices]
        w = w[detected_indices]
        h = h[detected_indices]
        conf = conf[detected_indices]
        prob = prob.transpose(1, 2, 3, 0)[detected_indices]
        results = []
        for i in range(detected_indices.sum()):
            class_id = prob[i].argmax()
            label = self.labels[class_id]
            results.append({
                'class_id': class_id,
                'label': label,
                'probs': prob[i],
                'conf' : conf[i],
                'objectness': conf[i] * prob[i].max(),
                'box'  : utils.Box(
                            x[i] * orig_input_width,
                            y[i] * orig_input_height,
                            w[i] * orig_input_width,
                            h[i] * orig_input_height).crop_region(orig_input_height, orig_input_width)
            })

        # nms
        print len(results)
        nms_results = utils.nms(results, self.iou_thresh)
        return nms_results