[Fix] fix metric bug (#1679)

* fix metric bug * fix comments * fix comments

[Fix] fix metric bug (#1679)
* fix metric bug * fix comments * fix comments
a50c71dd · ChaimZhu · 66d883f2 · a50c71dd · a50c71dd · a50c71dd
Commit a50c71dd authored Aug 09, 2022 by ChaimZhu
10 changed files
--- a/mmdet3d/evaluation/metrics/nuscenes_metric.py
+++ b/mmdet3d/evaluation/metrics/nuscenes_metric.py
@@ -289,7 +289,7 @@ class NuScenesMetric(BaseMetric):
                print(f'\nFormating bboxes of {name}')
                results_ = [out[name] for out in results]
                tmp_file_ = osp.join(jsonfile_prefix, name)
-                box_type_3d = type(results_[0]['bbox_3d'])
+                box_type_3d = type(results_[0]['bboxes_3d'])
                if box_type_3d == LiDARInstance3DBoxes:
                    result_dict[name] = self._format_lidar_bbox(
                        results_, sample_id_list, classes, tmp_file_)
@@ -299,6 +299,53 @@ class NuScenesMetric(BaseMetric):
        return result_dict, tmp_dir
+    def get_attr_name(self, attr_idx, label_name):
+        """Get attribute from predicted index.
+        This is a workaround to predict attribute when the predicted velocity
+        is not reliable. We map the predicted attribute index to the one
+        in the attribute set. If it is consistent with the category, we will
+        keep it. Otherwise, we will use the default attribute.
+        Args:
+            attr_idx (int): Attribute index.
+            label_name (str): Predicted category name.
+        Returns:
+            str: Predicted attribute name.
+        """
+        # TODO: Simplify the variable name
+        AttrMapping_rev2 = [
+            'cycle.with_rider', 'cycle.without_rider', 'pedestrian.moving',
+            'pedestrian.standing', 'pedestrian.sitting_lying_down',
+            'vehicle.moving', 'vehicle.parked', 'vehicle.stopped', 'None'
+        ]
+        if label_name == 'car' or label_name == 'bus' \
+            or label_name == 'truck' or label_name == 'trailer' \
+                or label_name == 'construction_vehicle':
+            if AttrMapping_rev2[attr_idx] == 'vehicle.moving' or \
+                AttrMapping_rev2[attr_idx] == 'vehicle.parked' or \
+                    AttrMapping_rev2[attr_idx] == 'vehicle.stopped':
+                return AttrMapping_rev2[attr_idx]
+            else:
+                return self.DefaultAttribute[label_name]
+        elif label_name == 'pedestrian':
+            if AttrMapping_rev2[attr_idx] == 'pedestrian.moving' or \
+                AttrMapping_rev2[attr_idx] == 'pedestrian.standing' or \
+                    AttrMapping_rev2[attr_idx] == \
+                    'pedestrian.sitting_lying_down':
+                return AttrMapping_rev2[attr_idx]
+            else:
+                return self.DefaultAttribute[label_name]
+        elif label_name == 'bicycle' or label_name == 'motorcycle':
+            if AttrMapping_rev2[attr_idx] == 'cycle.with_rider' or \
+                    AttrMapping_rev2[attr_idx] == 'cycle.without_rider':
+                return AttrMapping_rev2[attr_idx]
+            else:
+                return self.DefaultAttribute[label_name]
+        else:
+            return self.DefaultAttribute[label_name]
    def _format_camera_bbox(self,
                            results: List[dict],
                            sample_id_list: List[int],
@@ -335,6 +382,7 @@ class NuScenesMetric(BaseMetric):
            sample_id = sample_id_list[i]
+            frame_sample_id = sample_id // CAM_NUM
            camera_type_id = sample_id % CAM_NUM
            if camera_type_id == 0:
@@ -344,19 +392,19 @@ class NuScenesMetric(BaseMetric):
            # need to merge results from images of the same sample
            annos = []
            boxes, attrs = output_to_nusc_box(det)
-            sample_token = self.data_infos[sample_id]['token']
+            sample_token = self.data_infos[frame_sample_id]['token']
            camera_type = camera_types[camera_type_id]
            boxes, attrs = cam_nusc_box_to_global(
-                self.data_infos[sample_id - camera_type_id], boxes, attrs,
+                self.data_infos[frame_sample_id], boxes, attrs, classes,
-                camera_type, classes, self.eval_detection_configs)
+                self.eval_detection_configs, camera_type)
            boxes_per_frame.extend(boxes)
            attrs_per_frame.extend(attrs)
            # Remove redundant predictions caused by overlap of images
            if (sample_id + 1) % CAM_NUM != 0:
                continue
-            boxes = global_nusc_box_to_cam(
+            boxes = global_nusc_box_to_cam(self.data_infos[frame_sample_id],
-                self.data_infos[sample_id + 1 - CAM_NUM], boxes_per_frame,
+                                           boxes_per_frame, classes,
-                classes, self.eval_detection_configs)
+                                           self.eval_detection_configs)
            cam_boxes3d, scores, labels = nusc_box_to_cam_box3d(boxes)
            # box nms 3d over 6 images in a frame
            # TODO: move this global setting into config
@@ -386,8 +434,8 @@ class NuScenesMetric(BaseMetric):
            det = bbox3d2result(cam_boxes3d, scores, labels, attrs)
            boxes, attrs = output_to_nusc_box(det)
            boxes, attrs = cam_nusc_box_to_global(
-                self.data_infos[sample_id + 1 - CAM_NUM], boxes, attrs,
+                self.data_infos[frame_sample_id], boxes, attrs, classes,
-                classes, self.eval_detection_configs)
+                self.eval_detection_configs)
            for i, box in enumerate(boxes):
                name = classes[box.label]
@@ -500,14 +548,14 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
    Args:
        detection (dict): Detection results.
-            - bbox_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
+            - bboxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
            - scores_3d (torch.Tensor): Detection scores.
            - labels_3d (torch.Tensor): Predicted box labels.
    Returns:
        list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
    """
-    bbox3d = detection['bbox_3d']
+    bbox3d = detection['bboxes_3d']
    scores = detection['scores_3d'].numpy()
    labels = detection['labels_3d'].numpy()
    attrs = None
@@ -603,10 +651,14 @@ def lidar_nusc_box_to_global(
    return box_list
-def cam_nusc_box_to_global(info: dict, boxes: List[NuScenesBox],
+def cam_nusc_box_to_global(
-                           attrs: List[str], camera_type: str,
+    info: dict,
-                           classes: List[str],
+    boxes: List[NuScenesBox],
-                           eval_configs: DetectionConfig) -> List[NuScenesBox]:
+    attrs: List[str],
+    classes: List[str],
+    eval_configs: DetectionConfig,
+    camera_type: str = 'CAM_FRONT',
+) -> List[NuScenesBox]:
    """Convert the box from camera to global coordinate.
    Args:
@@ -678,7 +730,7 @@ def global_nusc_box_to_cam(info: dict, boxes: List[NuScenesBox],
            continue
        # Move box to camera coord system
        cam2ego = np.array(info['images']['CAM_FRONT']['cam2ego'])
-        box.translate(-cam2ego[:3, :3])
+        box.translate(-cam2ego[:3, 3])
        box.rotate(
            pyquaternion.Quaternion(matrix=cam2ego, rtol=1e-05,
                                    atol=1e-07).inverse)

--- a/mmdet3d/models/dense_heads/base_3d_dense_head.py
+++ b/mmdet3d/models/dense_heads/base_3d_dense_head.py
@@ -172,7 +172,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
              (num_instances, )
            - labels_3d (Tensor): Labels of bboxes, has a shape
              (num_instances, ).
-            - bbox_3d (BaseInstance3DBoxes): Prediction of bboxes,
+            - bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
              contains a tensor with shape (num_instances, C), where
              C >= 7.
        """
@@ -222,7 +222,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
              (num_instances, )
            - labels_3d (Tensor): Labels of bboxes, has a shape
              (num_instances, ).
-            - bbox_3d (BaseInstance3DBoxes): Prediction of bboxes,
+            - bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
              contains a tensor with shape (num_instances, C), where
              C >= 7.
        """

--- a/mmdet3d/models/dense_heads/fcos_mono3d_head.py
+++ b/mmdet3d/models/dense_heads/fcos_mono3d_head.py
@@ -702,14 +702,13 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
        # Due to the ground truth centers_2d are the gravity center of objects
        # v0.10.0 fix inplace operation to the input tensor of cam_box3d
        # So here we also need to add origin=(0.5, 0.5, 0.5)
-        if not self.pred_attrs:
-            attrs = None
        results = InstanceData()
        results.bboxes_3d = bboxes
        results.scores_3d = scores
        results.labels_3d = labels
-        results.attr_labels = attrs
+        if self.pred_attrs and attrs is not None:
+            results.attr_labels = attrs
        return results

--- a/mmdet3d/models/dense_heads/pgd_head.py
+++ b/mmdet3d/models/dense_heads/pgd_head.py
@@ -1139,7 +1139,7 @@ class PGDHead(FCOSMono3DHead):
            points (list[Tensor]): Points of each fpn level, each has shape
                (num_points, 2).
            batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
-                gt_instance_3d.  It usually includes ``bbox_3d``、
+                gt_instance_3d.  It usually includes ``bboxes_3d``、
                ``labels_3d``、``depths``、``centers_2d`` and attributes.
            batch_gt_instances (list[:obj:`InstanceData`]): Batch of
                gt_instance.  It usually includes ``bboxes``、``labels``.

--- a/mmdet3d/models/detectors/base.py
+++ b/mmdet3d/models/detectors/base.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import List, Optional, Union
-from mmengine import InstanceData
 from mmdet3d.registry import MODELS
 from mmdet3d.structures import Det3DDataSample
 from mmdet3d.structures.det3d_data_sample import (ForwardResults,
@@ -114,7 +112,7 @@ class Base3DDetector(BaseDetector):
              (num_instance, )
            - labels_3d (Tensor): Labels of 3D bboxes, has a shape
              (num_instances, ).
-            - bbox_3d (Tensor): Contains a tensor with shape
+            - bboxes_3d (Tensor): Contains a tensor with shape
              (num_instances, C) where C >=7.
            When there are image prediction in some models, it should
            contains  `pred_instances`, And the ``pred_instances`` normally
@@ -133,17 +131,11 @@ class Base3DDetector(BaseDetector):
               (results_list_3d is not None),\
               'please pass at least one type of results_list'
-        if results_list_2d is None:
-            results_list_2d = [
-                InstanceData() for _ in range(len(results_list_3d))
-            ]
-        if results_list_3d is None:
-            results_list_3d = [
-                InstanceData() for _ in range(len(results_list_2d))
-            ]
        for i in range(len(results_list_3d)):
            result = Det3DDataSample()
-            result.pred_instances_3d = results_list_3d[i]
+            if results_list_3d is not None:
-            result.pred_instances = results_list_2d[i]
+                result.pred_instances_3d = results_list_3d[i]
+            if results_list_2d is not None:
+                result.pred_instances = results_list_2d[i]
            data_sample_list.append(result)
        return data_sample_list
--- a/mmdet3d/models/detectors/groupfree3dnet.py
+++ b/mmdet3d/models/detectors/groupfree3dnet.py
@@ -75,7 +75,7 @@ class GroupFree3DNet(SingleStage3DDetector):
                (num_instance, )
            - labels_3d (Tensor): Labels of bboxes, has a shape
                (num_instances, ).
-            - bbox_3d (Tensor): Contains a tensor with shape
+            - bboxes_3d (Tensor): Contains a tensor with shape
                (num_instances, C) where C >=7.
        """
        x = self.extract_feat(batch_inputs_dict)

--- a/mmdet3d/models/detectors/single_stage.py
+++ b/mmdet3d/models/detectors/single_stage.py
@@ -103,7 +103,7 @@ class SingleStage3DDetector(Base3DDetector):
                    (num_instance, )
                - labels_3d (Tensor): Labels of bboxes, has a shape
                    (num_instances, ).
-                - bbox_3d (Tensor): Contains a tensor with shape
+                - bboxes_3d (Tensor): Contains a tensor with shape
                    (num_instances, C) where C >=7.
        """
        x = self.extract_feat(batch_inputs_dict)

--- a/mmdet3d/models/detectors/two_stage.py
+++ b/mmdet3d/models/detectors/two_stage.py
@@ -143,7 +143,7 @@ class TwoStage3DDetector(Base3DDetector):
                (num_instance, )
            - labels_3d (Tensor): Labels of bboxes, has a shape
                (num_instances, ).
-            - bbox_3d (Tensor): Contains a tensor with shape
+            - bboxes_3d (Tensor): Contains a tensor with shape
                (num_instances, C) where C >=7.
        """
        feats_dict = self.extract_feat(batch_inputs_dict)

--- a/mmdet3d/structures/det3d_data_sample.py
+++ b/mmdet3d/structures/det3d_data_sample.py
@@ -112,7 +112,7 @@ class Det3DDataSample(DetDataSample):
    >>> assert 'pred_instances' in data_sample
    >>> pred_instances_3d = InstanceData(metainfo=meta_info)
-    >>> pred_instances_3d.bbox_3d = BaseInstance3DBoxes(torch.rand((5, 7)))
+    >>> pred_instances_3d.bboxes_3d = BaseInstance3DBoxes(torch.rand((5, 7)))
    >>> pred_instances_3d.scores_3d = torch.rand((5, ))
    >>> pred_instances_3d.labels_3d = torch.rand((5, ))
    >>> data_sample = Det3DDataSample(pred_instances_3d=pred_instances_3d)

--- a/mmdet3d/structures/ops/transforms.py
+++ b/mmdet3d/structures/ops/transforms.py
@@ -67,11 +67,11 @@ def bbox3d2result(bboxes, scores, labels, attrs=None):
            - attrs_3d (torch.Tensor, optional): Box attributes.
    """
    result_dict = dict(
-        boxes_3d=bboxes.to('cpu'),
+        bboxes_3d=bboxes.to('cpu'),
        scores_3d=scores.cpu(),
        labels_3d=labels.cpu())
    if attrs is not None:
-        result_dict['attrs_3d'] = attrs.cpu()
+        result_dict['attr_labels'] = attrs.cpu()
    return result_dict