Commit a50c71dd authored by ChaimZhu's avatar ChaimZhu
Browse files

[Fix] fix metric bug (#1679)

* fix metric bug

* fix comments

* fix comments
parent 66d883f2
...@@ -289,7 +289,7 @@ class NuScenesMetric(BaseMetric): ...@@ -289,7 +289,7 @@ class NuScenesMetric(BaseMetric):
print(f'\nFormating bboxes of {name}') print(f'\nFormating bboxes of {name}')
results_ = [out[name] for out in results] results_ = [out[name] for out in results]
tmp_file_ = osp.join(jsonfile_prefix, name) tmp_file_ = osp.join(jsonfile_prefix, name)
box_type_3d = type(results_[0]['bbox_3d']) box_type_3d = type(results_[0]['bboxes_3d'])
if box_type_3d == LiDARInstance3DBoxes: if box_type_3d == LiDARInstance3DBoxes:
result_dict[name] = self._format_lidar_bbox( result_dict[name] = self._format_lidar_bbox(
results_, sample_id_list, classes, tmp_file_) results_, sample_id_list, classes, tmp_file_)
...@@ -299,6 +299,53 @@ class NuScenesMetric(BaseMetric): ...@@ -299,6 +299,53 @@ class NuScenesMetric(BaseMetric):
return result_dict, tmp_dir return result_dict, tmp_dir
def get_attr_name(self, attr_idx, label_name):
"""Get attribute from predicted index.
This is a workaround to predict attribute when the predicted velocity
is not reliable. We map the predicted attribute index to the one
in the attribute set. If it is consistent with the category, we will
keep it. Otherwise, we will use the default attribute.
Args:
attr_idx (int): Attribute index.
label_name (str): Predicted category name.
Returns:
str: Predicted attribute name.
"""
# TODO: Simplify the variable name
AttrMapping_rev2 = [
'cycle.with_rider', 'cycle.without_rider', 'pedestrian.moving',
'pedestrian.standing', 'pedestrian.sitting_lying_down',
'vehicle.moving', 'vehicle.parked', 'vehicle.stopped', 'None'
]
if label_name == 'car' or label_name == 'bus' \
or label_name == 'truck' or label_name == 'trailer' \
or label_name == 'construction_vehicle':
if AttrMapping_rev2[attr_idx] == 'vehicle.moving' or \
AttrMapping_rev2[attr_idx] == 'vehicle.parked' or \
AttrMapping_rev2[attr_idx] == 'vehicle.stopped':
return AttrMapping_rev2[attr_idx]
else:
return self.DefaultAttribute[label_name]
elif label_name == 'pedestrian':
if AttrMapping_rev2[attr_idx] == 'pedestrian.moving' or \
AttrMapping_rev2[attr_idx] == 'pedestrian.standing' or \
AttrMapping_rev2[attr_idx] == \
'pedestrian.sitting_lying_down':
return AttrMapping_rev2[attr_idx]
else:
return self.DefaultAttribute[label_name]
elif label_name == 'bicycle' or label_name == 'motorcycle':
if AttrMapping_rev2[attr_idx] == 'cycle.with_rider' or \
AttrMapping_rev2[attr_idx] == 'cycle.without_rider':
return AttrMapping_rev2[attr_idx]
else:
return self.DefaultAttribute[label_name]
else:
return self.DefaultAttribute[label_name]
def _format_camera_bbox(self, def _format_camera_bbox(self,
results: List[dict], results: List[dict],
sample_id_list: List[int], sample_id_list: List[int],
...@@ -335,6 +382,7 @@ class NuScenesMetric(BaseMetric): ...@@ -335,6 +382,7 @@ class NuScenesMetric(BaseMetric):
sample_id = sample_id_list[i] sample_id = sample_id_list[i]
frame_sample_id = sample_id // CAM_NUM
camera_type_id = sample_id % CAM_NUM camera_type_id = sample_id % CAM_NUM
if camera_type_id == 0: if camera_type_id == 0:
...@@ -344,19 +392,19 @@ class NuScenesMetric(BaseMetric): ...@@ -344,19 +392,19 @@ class NuScenesMetric(BaseMetric):
# need to merge results from images of the same sample # need to merge results from images of the same sample
annos = [] annos = []
boxes, attrs = output_to_nusc_box(det) boxes, attrs = output_to_nusc_box(det)
sample_token = self.data_infos[sample_id]['token'] sample_token = self.data_infos[frame_sample_id]['token']
camera_type = camera_types[camera_type_id] camera_type = camera_types[camera_type_id]
boxes, attrs = cam_nusc_box_to_global( boxes, attrs = cam_nusc_box_to_global(
self.data_infos[sample_id - camera_type_id], boxes, attrs, self.data_infos[frame_sample_id], boxes, attrs, classes,
camera_type, classes, self.eval_detection_configs) self.eval_detection_configs, camera_type)
boxes_per_frame.extend(boxes) boxes_per_frame.extend(boxes)
attrs_per_frame.extend(attrs) attrs_per_frame.extend(attrs)
# Remove redundant predictions caused by overlap of images # Remove redundant predictions caused by overlap of images
if (sample_id + 1) % CAM_NUM != 0: if (sample_id + 1) % CAM_NUM != 0:
continue continue
boxes = global_nusc_box_to_cam( boxes = global_nusc_box_to_cam(self.data_infos[frame_sample_id],
self.data_infos[sample_id + 1 - CAM_NUM], boxes_per_frame, boxes_per_frame, classes,
classes, self.eval_detection_configs) self.eval_detection_configs)
cam_boxes3d, scores, labels = nusc_box_to_cam_box3d(boxes) cam_boxes3d, scores, labels = nusc_box_to_cam_box3d(boxes)
# box nms 3d over 6 images in a frame # box nms 3d over 6 images in a frame
# TODO: move this global setting into config # TODO: move this global setting into config
...@@ -386,8 +434,8 @@ class NuScenesMetric(BaseMetric): ...@@ -386,8 +434,8 @@ class NuScenesMetric(BaseMetric):
det = bbox3d2result(cam_boxes3d, scores, labels, attrs) det = bbox3d2result(cam_boxes3d, scores, labels, attrs)
boxes, attrs = output_to_nusc_box(det) boxes, attrs = output_to_nusc_box(det)
boxes, attrs = cam_nusc_box_to_global( boxes, attrs = cam_nusc_box_to_global(
self.data_infos[sample_id + 1 - CAM_NUM], boxes, attrs, self.data_infos[frame_sample_id], boxes, attrs, classes,
classes, self.eval_detection_configs) self.eval_detection_configs)
for i, box in enumerate(boxes): for i, box in enumerate(boxes):
name = classes[box.label] name = classes[box.label]
...@@ -500,14 +548,14 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]: ...@@ -500,14 +548,14 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
Args: Args:
detection (dict): Detection results. detection (dict): Detection results.
- bbox_3d (:obj:`BaseInstance3DBoxes`): Detection bbox. - bboxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): Detection scores. - scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): Predicted box labels. - labels_3d (torch.Tensor): Predicted box labels.
Returns: Returns:
list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes. list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
""" """
bbox3d = detection['bbox_3d'] bbox3d = detection['bboxes_3d']
scores = detection['scores_3d'].numpy() scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy() labels = detection['labels_3d'].numpy()
attrs = None attrs = None
...@@ -603,10 +651,14 @@ def lidar_nusc_box_to_global( ...@@ -603,10 +651,14 @@ def lidar_nusc_box_to_global(
return box_list return box_list
def cam_nusc_box_to_global(info: dict, boxes: List[NuScenesBox], def cam_nusc_box_to_global(
attrs: List[str], camera_type: str, info: dict,
classes: List[str], boxes: List[NuScenesBox],
eval_configs: DetectionConfig) -> List[NuScenesBox]: attrs: List[str],
classes: List[str],
eval_configs: DetectionConfig,
camera_type: str = 'CAM_FRONT',
) -> List[NuScenesBox]:
"""Convert the box from camera to global coordinate. """Convert the box from camera to global coordinate.
Args: Args:
...@@ -678,7 +730,7 @@ def global_nusc_box_to_cam(info: dict, boxes: List[NuScenesBox], ...@@ -678,7 +730,7 @@ def global_nusc_box_to_cam(info: dict, boxes: List[NuScenesBox],
continue continue
# Move box to camera coord system # Move box to camera coord system
cam2ego = np.array(info['images']['CAM_FRONT']['cam2ego']) cam2ego = np.array(info['images']['CAM_FRONT']['cam2ego'])
box.translate(-cam2ego[:3, :3]) box.translate(-cam2ego[:3, 3])
box.rotate( box.rotate(
pyquaternion.Quaternion(matrix=cam2ego, rtol=1e-05, pyquaternion.Quaternion(matrix=cam2ego, rtol=1e-05,
atol=1e-07).inverse) atol=1e-07).inverse)
......
...@@ -172,7 +172,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta): ...@@ -172,7 +172,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
(num_instances, ) (num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape - labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ). (num_instances, ).
- bbox_3d (BaseInstance3DBoxes): Prediction of bboxes, - bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where contains a tensor with shape (num_instances, C), where
C >= 7. C >= 7.
""" """
...@@ -222,7 +222,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta): ...@@ -222,7 +222,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
(num_instances, ) (num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape - labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ). (num_instances, ).
- bbox_3d (BaseInstance3DBoxes): Prediction of bboxes, - bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where contains a tensor with shape (num_instances, C), where
C >= 7. C >= 7.
""" """
......
...@@ -702,14 +702,13 @@ class FCOSMono3DHead(AnchorFreeMono3DHead): ...@@ -702,14 +702,13 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
# Due to the ground truth centers_2d are the gravity center of objects # Due to the ground truth centers_2d are the gravity center of objects
# v0.10.0 fix inplace operation to the input tensor of cam_box3d # v0.10.0 fix inplace operation to the input tensor of cam_box3d
# So here we also need to add origin=(0.5, 0.5, 0.5) # So here we also need to add origin=(0.5, 0.5, 0.5)
if not self.pred_attrs:
attrs = None
results = InstanceData() results = InstanceData()
results.bboxes_3d = bboxes results.bboxes_3d = bboxes
results.scores_3d = scores results.scores_3d = scores
results.labels_3d = labels results.labels_3d = labels
results.attr_labels = attrs if self.pred_attrs and attrs is not None:
results.attr_labels = attrs
return results return results
......
...@@ -1139,7 +1139,7 @@ class PGDHead(FCOSMono3DHead): ...@@ -1139,7 +1139,7 @@ class PGDHead(FCOSMono3DHead):
points (list[Tensor]): Points of each fpn level, each has shape points (list[Tensor]): Points of each fpn level, each has shape
(num_points, 2). (num_points, 2).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bbox_3d``、 gt_instance_3d. It usually includes ``bboxes_3d``、
``labels_3d``、``depths``、``centers_2d`` and attributes. ``labels_3d``、``depths``、``centers_2d`` and attributes.
batch_gt_instances (list[:obj:`InstanceData`]): Batch of batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes``、``labels``. gt_instance. It usually includes ``bboxes``、``labels``.
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Union from typing import List, Optional, Union
from mmengine import InstanceData
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.structures import Det3DDataSample from mmdet3d.structures import Det3DDataSample
from mmdet3d.structures.det3d_data_sample import (ForwardResults, from mmdet3d.structures.det3d_data_sample import (ForwardResults,
...@@ -114,7 +112,7 @@ class Base3DDetector(BaseDetector): ...@@ -114,7 +112,7 @@ class Base3DDetector(BaseDetector):
(num_instance, ) (num_instance, )
- labels_3d (Tensor): Labels of 3D bboxes, has a shape - labels_3d (Tensor): Labels of 3D bboxes, has a shape
(num_instances, ). (num_instances, ).
- bbox_3d (Tensor): Contains a tensor with shape - bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7. (num_instances, C) where C >=7.
When there are image prediction in some models, it should When there are image prediction in some models, it should
contains `pred_instances`, And the ``pred_instances`` normally contains `pred_instances`, And the ``pred_instances`` normally
...@@ -133,17 +131,11 @@ class Base3DDetector(BaseDetector): ...@@ -133,17 +131,11 @@ class Base3DDetector(BaseDetector):
(results_list_3d is not None),\ (results_list_3d is not None),\
'please pass at least one type of results_list' 'please pass at least one type of results_list'
if results_list_2d is None:
results_list_2d = [
InstanceData() for _ in range(len(results_list_3d))
]
if results_list_3d is None:
results_list_3d = [
InstanceData() for _ in range(len(results_list_2d))
]
for i in range(len(results_list_3d)): for i in range(len(results_list_3d)):
result = Det3DDataSample() result = Det3DDataSample()
result.pred_instances_3d = results_list_3d[i] if results_list_3d is not None:
result.pred_instances = results_list_2d[i] result.pred_instances_3d = results_list_3d[i]
if results_list_2d is not None:
result.pred_instances = results_list_2d[i]
data_sample_list.append(result) data_sample_list.append(result)
return data_sample_list return data_sample_list
...@@ -75,7 +75,7 @@ class GroupFree3DNet(SingleStage3DDetector): ...@@ -75,7 +75,7 @@ class GroupFree3DNet(SingleStage3DDetector):
(num_instance, ) (num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape - labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ). (num_instances, ).
- bbox_3d (Tensor): Contains a tensor with shape - bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7. (num_instances, C) where C >=7.
""" """
x = self.extract_feat(batch_inputs_dict) x = self.extract_feat(batch_inputs_dict)
......
...@@ -103,7 +103,7 @@ class SingleStage3DDetector(Base3DDetector): ...@@ -103,7 +103,7 @@ class SingleStage3DDetector(Base3DDetector):
(num_instance, ) (num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape - labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ). (num_instances, ).
- bbox_3d (Tensor): Contains a tensor with shape - bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7. (num_instances, C) where C >=7.
""" """
x = self.extract_feat(batch_inputs_dict) x = self.extract_feat(batch_inputs_dict)
......
...@@ -143,7 +143,7 @@ class TwoStage3DDetector(Base3DDetector): ...@@ -143,7 +143,7 @@ class TwoStage3DDetector(Base3DDetector):
(num_instance, ) (num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape - labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ). (num_instances, ).
- bbox_3d (Tensor): Contains a tensor with shape - bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7. (num_instances, C) where C >=7.
""" """
feats_dict = self.extract_feat(batch_inputs_dict) feats_dict = self.extract_feat(batch_inputs_dict)
......
...@@ -112,7 +112,7 @@ class Det3DDataSample(DetDataSample): ...@@ -112,7 +112,7 @@ class Det3DDataSample(DetDataSample):
>>> assert 'pred_instances' in data_sample >>> assert 'pred_instances' in data_sample
>>> pred_instances_3d = InstanceData(metainfo=meta_info) >>> pred_instances_3d = InstanceData(metainfo=meta_info)
>>> pred_instances_3d.bbox_3d = BaseInstance3DBoxes(torch.rand((5, 7))) >>> pred_instances_3d.bboxes_3d = BaseInstance3DBoxes(torch.rand((5, 7)))
>>> pred_instances_3d.scores_3d = torch.rand((5, )) >>> pred_instances_3d.scores_3d = torch.rand((5, ))
>>> pred_instances_3d.labels_3d = torch.rand((5, )) >>> pred_instances_3d.labels_3d = torch.rand((5, ))
>>> data_sample = Det3DDataSample(pred_instances_3d=pred_instances_3d) >>> data_sample = Det3DDataSample(pred_instances_3d=pred_instances_3d)
......
...@@ -67,11 +67,11 @@ def bbox3d2result(bboxes, scores, labels, attrs=None): ...@@ -67,11 +67,11 @@ def bbox3d2result(bboxes, scores, labels, attrs=None):
- attrs_3d (torch.Tensor, optional): Box attributes. - attrs_3d (torch.Tensor, optional): Box attributes.
""" """
result_dict = dict( result_dict = dict(
boxes_3d=bboxes.to('cpu'), bboxes_3d=bboxes.to('cpu'),
scores_3d=scores.cpu(), scores_3d=scores.cpu(),
labels_3d=labels.cpu()) labels_3d=labels.cpu())
if attrs is not None: if attrs is not None:
result_dict['attrs_3d'] = attrs.cpu() result_dict['attr_labels'] = attrs.cpu()
return result_dict return result_dict
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment