# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Evaluate Object Detection result on a single image. Annotate each detected result as true positives or false positive according to a predefined IOU ratio. Non Maximum Supression is used by default. Multi class detection is supported by default. """ import numpy as np from object_detection.utils import np_box_list from object_detection.utils import np_box_list_ops class PerImageEvaluation(object): """Evaluate detection result of a single image.""" def __init__(self, num_groundtruth_classes, matching_iou_threshold=0.5, nms_iou_threshold=0.3, nms_max_output_boxes=50): """Initialized PerImageEvaluation by evaluation parameters. Args: num_groundtruth_classes: Number of ground truth object classes matching_iou_threshold: A ratio of area intersection to union, which is the threshold to consider whether a detection is true positive or not nms_iou_threshold: IOU threshold used in Non Maximum Suppression. nms_max_output_boxes: Number of maximum output boxes in NMS. """ self.matching_iou_threshold = matching_iou_threshold self.nms_iou_threshold = nms_iou_threshold self.nms_max_output_boxes = nms_max_output_boxes self.num_groundtruth_classes = num_groundtruth_classes def compute_object_detection_metrics(self, detected_boxes, detected_scores, detected_class_labels, groundtruth_boxes, groundtruth_class_labels, groundtruth_is_difficult_lists): """Compute Object Detection related metrics from a single image. Args: detected_boxes: A float numpy array of shape [N, 4], representing N regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max] detected_scores: A float numpy array of shape [N, 1], representing the confidence scores of the detected N object instances. detected_class_labels: A integer numpy array of shape [N, 1], repreneting the class labels of the detected N object instances. groundtruth_boxes: A float numpy array of shape [M, 4], representing M regions of object instances in ground truth groundtruth_class_labels: An integer numpy array of shape [M, 1], representing M class labels of object instances in ground truth groundtruth_is_difficult_lists: A boolean numpy array of length M denoting whether a ground truth box is a difficult instance or not Returns: scores: A list of C float numpy arrays. Each numpy array is of shape [K, 1], representing K scores detected with object class label c tp_fp_labels: A list of C boolean numpy arrays. Each numpy array is of shape [K, 1], representing K True/False positive label of object instances detected with class label c is_class_correctly_detected_in_image: a numpy integer array of shape [C, 1], indicating whether the correponding class has a least one instance being correctly detected in the image """ detected_boxes, detected_scores, detected_class_labels = ( self._remove_invalid_boxes(detected_boxes, detected_scores, detected_class_labels)) scores, tp_fp_labels = self._compute_tp_fp( detected_boxes, detected_scores, detected_class_labels, groundtruth_boxes, groundtruth_class_labels, groundtruth_is_difficult_lists) is_class_correctly_detected_in_image = self._compute_cor_loc( detected_boxes, detected_scores, detected_class_labels, groundtruth_boxes, groundtruth_class_labels) return scores, tp_fp_labels, is_class_correctly_detected_in_image def _compute_cor_loc(self, detected_boxes, detected_scores, detected_class_labels, groundtruth_boxes, groundtruth_class_labels): """Compute CorLoc score for object detection result. Args: detected_boxes: A float numpy array of shape [N, 4], representing N regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max] detected_scores: A float numpy array of shape [N, 1], representing the confidence scores of the detected N object instances. detected_class_labels: A integer numpy array of shape [N, 1], repreneting the class labels of the detected N object instances. groundtruth_boxes: A float numpy array of shape [M, 4], representing M regions of object instances in ground truth groundtruth_class_labels: An integer numpy array of shape [M, 1], representing M class labels of object instances in ground truth Returns: is_class_correctly_detected_in_image: a numpy integer array of shape [C, 1], indicating whether the correponding class has a least one instance being correctly detected in the image """ is_class_correctly_detected_in_image = np.zeros( self.num_groundtruth_classes, dtype=int) for i in range(self.num_groundtruth_classes): gt_boxes_at_ith_class = groundtruth_boxes[ groundtruth_class_labels == i, :] detected_boxes_at_ith_class = detected_boxes[ detected_class_labels == i, :] detected_scores_at_ith_class = detected_scores[detected_class_labels == i] is_class_correctly_detected_in_image[i] = ( self._compute_is_aclass_correctly_detected_in_image( detected_boxes_at_ith_class, detected_scores_at_ith_class, gt_boxes_at_ith_class)) return is_class_correctly_detected_in_image def _compute_is_aclass_correctly_detected_in_image( self, detected_boxes, detected_scores, groundtruth_boxes): """Compute CorLoc score for a single class. Args: detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates detected_scores: A 1-d numpy array of length N representing classification score groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates Returns: is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a class is correctly detected in the image or not """ if detected_boxes.size > 0: if groundtruth_boxes.size > 0: max_score_id = np.argmax(detected_scores) detected_boxlist = np_box_list.BoxList( np.expand_dims(detected_boxes[max_score_id, :], axis=0)) gt_boxlist = np_box_list.BoxList(groundtruth_boxes) iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist) if np.max(iou) >= self.matching_iou_threshold: return 1 return 0 def _compute_tp_fp(self, detected_boxes, detected_scores, detected_class_labels, groundtruth_boxes, groundtruth_class_labels, groundtruth_is_difficult_lists): """Labels true/false positives of detections of an image across all classes. Args: detected_boxes: A float numpy array of shape [N, 4], representing N regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max] detected_scores: A float numpy array of shape [N, 1], representing the confidence scores of the detected N object instances. detected_class_labels: A integer numpy array of shape [N, 1], repreneting the class labels of the detected N object instances. groundtruth_boxes: A float numpy array of shape [M, 4], representing M regions of object instances in ground truth groundtruth_class_labels: An integer numpy array of shape [M, 1], representing M class labels of object instances in ground truth groundtruth_is_difficult_lists: A boolean numpy array of length M denoting whether a ground truth box is a difficult instance or not Returns: result_scores: A list of float numpy arrays. Each numpy array is of shape [K, 1], representing K scores detected with object class label c result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of shape [K, 1], representing K True/False positive label of object instances detected with class label c """ result_scores = [] result_tp_fp_labels = [] for i in range(self.num_groundtruth_classes): gt_boxes_at_ith_class = groundtruth_boxes[(groundtruth_class_labels == i ), :] groundtruth_is_difficult_list_at_ith_class = ( groundtruth_is_difficult_lists[groundtruth_class_labels == i]) detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i ), :] detected_scores_at_ith_class = detected_scores[detected_class_labels == i] scores, tp_fp_labels = self._compute_tp_fp_for_single_class( detected_boxes_at_ith_class, detected_scores_at_ith_class, gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class) result_scores.append(scores) result_tp_fp_labels.append(tp_fp_labels) return result_scores, result_tp_fp_labels def _remove_invalid_boxes(self, detected_boxes, detected_scores, detected_class_labels): valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2], detected_boxes[:, 1] < detected_boxes[:, 3]) return (detected_boxes[valid_indices, :], detected_scores[valid_indices], detected_class_labels[valid_indices]) def _compute_tp_fp_for_single_class(self, detected_boxes, detected_scores, groundtruth_boxes, groundtruth_is_difficult_list): """Labels boxes detected with the same class from the same image as tp/fp. Args: detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates detected_scores: A 1-d numpy array of length N representing classification score groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates groundtruth_is_difficult_list: A boolean numpy array of length M denoting whether a ground truth box is a difficult instance or not Returns: scores: A numpy array representing the detection scores tp_fp_labels: a boolean numpy array indicating whether a detection is a true positive. """ if detected_boxes.size == 0: return np.array([], dtype=float), np.array([], dtype=bool) detected_boxlist = np_box_list.BoxList(detected_boxes) detected_boxlist.add_field('scores', detected_scores) detected_boxlist = np_box_list_ops.non_max_suppression( detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold) scores = detected_boxlist.get_field('scores') if groundtruth_boxes.size == 0: return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool) gt_boxlist = np_box_list.BoxList(groundtruth_boxes) iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist) max_overlap_gt_ids = np.argmax(iou, axis=1) is_gt_box_detected = np.zeros(gt_boxlist.num_boxes(), dtype=bool) tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool) is_matched_to_difficult_box = np.zeros( detected_boxlist.num_boxes(), dtype=bool) for i in range(detected_boxlist.num_boxes()): gt_id = max_overlap_gt_ids[i] if iou[i, gt_id] >= self.matching_iou_threshold: if not groundtruth_is_difficult_list[gt_id]: if not is_gt_box_detected[gt_id]: tp_fp_labels[i] = True is_gt_box_detected[gt_id] = True else: is_matched_to_difficult_box[i] = True return scores[~is_matched_to_difficult_box], tp_fp_labels[ ~is_matched_to_difficult_box]