[Enhance]: Add format_only option for nuScenes and Waymo evaluation (#2151)

* support submit test set on nuscenes and waymo dataset * Fix typo

[Enhance]: Add format_only option for nuScenes and Waymo evaluation (#2151)
* support submit test set on nuscenes and waymo dataset * Fix typo
8bf2f5a4 · Xiang Xu · GitHub · ed081770 · 8bf2f5a4 · 8bf2f5a4
Unverified Commit 8bf2f5a4 authored Jan 18, 2023 by Xiang Xu Committed by GitHub Jan 18, 2023
4 changed files
--- a/configs/_base_/datasets/kitti-mono3d.py
+++ b/configs/_base_/datasets/kitti-mono3d.py
@@ -80,8 +80,7 @@ test_dataloader = val_dataloader
 val_evaluator = dict(
    type='KittiMetric',
    ann_file=data_root + 'kitti_infos_val.pkl',
-    metric='bbox',
-    pred_box_type_3d='Camera')
+    metric='bbox')

 test_evaluator = val_evaluator


--- a/mmdet3d/evaluation/metrics/kitti_metric.py
+++ b/mmdet3d/evaluation/metrics/kitti_metric.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import tempfile
 from os import path as osp
-from typing import Dict, List, Optional, Sequence, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union

 import mmengine
 import numpy as np
@@ -22,44 +22,49 @@ class KittiMetric(BaseMetric):

    Args:
        ann_file (str): Annotation file path.
-        metric (str | list[str]): Metrics to be evaluated.
-            Default to 'bbox'.
-        pcd_limit_range (list): The range of point cloud used to
+        metric (str or List[str]): Metrics to be evaluated.
+            Defaults to 'bbox'.
+        pcd_limit_range (List[float]): The range of point cloud used to
            filter invalid predicted boxes.
-            Default to [0, -40, -3, 70.4, 40, 0.0].
+            Defaults to [0, -40, -3, 70.4, 40, 0.0].
        prefix (str, optional): The prefix that will be added in the metric
            names to disambiguate homonymous metrics of different evaluators.
            If prefix is not provided in the argument, self.default_prefix
            will be used instead. Defaults to None.
        pklfile_prefix (str, optional): The prefix of pkl files, including
            the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
-        default_cam_key (str, optional): The default camera for lidar to
-            camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
+            If not specified, a temp file will be created. Defaults to None.
+        default_cam_key (str): The default camera for lidar to camera
+            conversion. By default, KITTI: 'CAM2', Waymo: 'CAM_FRONT'.
+            Defaults to 'CAM2'
        format_only (bool): Format the output results without perform
            evaluation. It is useful when you want to format the result
            to a specific format and submit it to the test server.
            Defaults to False.
        submission_prefix (str, optional): The prefix of submission data.
            If not specified, the submission data will not be generated.
-            Default: None.
+            Defaults to None.
        collect_device (str): Device name used for collecting results
            from different ranks during distributed training. Must be 'cpu' or
            'gpu'. Defaults to 'cpu'.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmengine.fileio.FileClient` for details.
+            Defaults to dict(backend='disk').
    """

-    def __init__(self,
+    def __init__(
+        self,
        ann_file: str,
        metric: Union[str, List[str]] = 'bbox',
-                 pred_box_type_3d: str = 'LiDAR',
        pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
        prefix: Optional[str] = None,
-                 pklfile_prefix: str = None,
+        pklfile_prefix: Optional[str] = None,
        default_cam_key: str = 'CAM2',
        format_only: bool = False,
-                 submission_prefix: str = None,
+        submission_prefix: Optional[str] = None,
        collect_device: str = 'cpu',
-                 file_client_args: dict = dict(backend='disk')):
+        file_client_args: dict = dict(backend='disk')
+    ) -> None:
        self.default_prefix = 'Kitti metric'
        super(KittiMetric, self).__init__(
            collect_device=collect_device, prefix=prefix)
@@ -68,25 +73,23 @@ class KittiMetric(BaseMetric):
        self.pklfile_prefix = pklfile_prefix
        self.format_only = format_only
        if self.format_only:
-            assert submission_prefix is not None, 'submission_prefix must be'
-            'not None when format_only is True, otherwise the result files'
-            'will be saved to a temp directory which will be cleaned up at'
+            assert submission_prefix is not None, 'submission_prefix must be '
+            'not None when format_only is True, otherwise the result files '
+            'will be saved to a temp directory which will be cleaned up at '
            'the end.'

        self.submission_prefix = submission_prefix
-        self.pred_box_type_3d = pred_box_type_3d
        self.default_cam_key = default_cam_key
        self.file_client_args = file_client_args
-        self.default_cam_key = default_cam_key

        allowed_metrics = ['bbox', 'img_bbox', 'mAP', 'LET_mAP']
        self.metrics = metric if isinstance(metric, list) else [metric]
        for metric in self.metrics:
            if metric not in allowed_metrics:
                raise KeyError("metric should be one of 'bbox', 'img_bbox', "
-                               'but got {metric}.')
+                               f'but got {metric}.')

-    def convert_annos_to_kitti_annos(self, data_infos: dict) -> list:
+    def convert_annos_to_kitti_annos(self, data_infos: dict) -> List[dict]:
        """Convert loading annotations to Kitti annotations.

        Args:
@@ -171,11 +174,11 @@ class KittiMetric(BaseMetric):
            result['sample_idx'] = sample_idx
            self.results.append(result)

-    def compute_metrics(self, results: list) -> Dict[str, float]:
+    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
        """Compute the metrics from processed results.

        Args:
-            results (list): The processed results of the whole dataset.
+            results (List[dict]): The processed results of the whole dataset.

        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
@@ -220,25 +223,25 @@ class KittiMetric(BaseMetric):
        return metric_dict

    def kitti_evaluate(self,
-                       results_dict: List[dict],
+                       results_dict: dict,
                       gt_annos: List[dict],
-                       metric: str = None,
-                       classes: List[str] = None,
-                       logger: MMLogger = None) -> dict:
+                       metric: Optional[str] = None,
+                       classes: Optional[List[str]] = None,
+                       logger: Optional[MMLogger] = None) -> Dict[str, float]:
        """Evaluation in KITTI protocol.

        Args:
            results_dict (dict): Formatted results of the dataset.
-            gt_annos (list[dict]): Contain gt information of each sample.
+            gt_annos (List[dict]): Contain gt information of each sample.
            metric (str, optional): Metrics to be evaluated.
-                Default: None.
+                Defaults to None.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
            logger (MMLogger, optional): Logger used for printing
-                related information during evaluation. Default: None.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+                related information during evaluation. Defaults to None.

        Returns:
-            dict[str, float]: Results of each evaluation metric.
+            Dict[str, float]: Results of each evaluation metric.
        """
        ap_dict = dict()
        for name in results_dict:
@@ -249,32 +252,33 @@ class KittiMetric(BaseMetric):
            ap_result_str, ap_dict_ = kitti_eval(
                gt_annos, results_dict[name], classes, eval_types=eval_types)
            for ap_type, ap in ap_dict_.items():
-                ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
+                ap_dict[f'{name}/{ap_type}'] = float(f'{ap:.4f}')

            print_log(f'Results of {name}:\n' + ap_result_str, logger=logger)

        return ap_dict

-    def format_results(self,
+    def format_results(
+        self,
        results: List[dict],
-                       pklfile_prefix: str = None,
-                       submission_prefix: str = None,
-                       classes: List[str] = None):
+        pklfile_prefix: Optional[str] = None,
+        submission_prefix: Optional[str] = None,
+        classes: Optional[List[str]] = None
+    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
        """Format the results to pkl file.

        Args:
-            results (list[dict]): Testing results of the
-                dataset.
+            results (List[dict]): Testing results of the dataset.
            pklfile_prefix (str, optional): The prefix of pkl files. It
                includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
            submission_prefix (str, optional): The prefix of submitted files.
                It includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+                Defaults to None.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.

        Returns:
            tuple: (result_dict, tmp_dir), result_dict is a dict containing
@@ -287,7 +291,7 @@ class KittiMetric(BaseMetric):
        else:
            tmp_dir = None
        result_dict = dict()
-        sample_id_list = [result['sample_idx'] for result in results]
+        sample_idx_list = [result['sample_idx'] for result in results]
        for name in results[0]:
            if submission_prefix is not None:
                submission_prefix_ = osp.join(submission_prefix, name)
@@ -301,7 +305,7 @@ class KittiMetric(BaseMetric):
                    0] != '_' and results[0][name]:
                net_outputs = [result[name] for result in results]
                result_list_ = self.bbox2result_kitti(net_outputs,
-                                                      sample_id_list, classes,
+                                                      sample_idx_list, classes,
                                                      pklfile_prefix_,
                                                      submission_prefix_)
                result_dict[name] = result_list_
@@ -309,32 +313,33 @@ class KittiMetric(BaseMetric):
                    name]:
                net_outputs = [result[name] for result in results]
                result_list_ = self.bbox2result_kitti2d(
-                    net_outputs, sample_id_list, classes, pklfile_prefix_,
+                    net_outputs, sample_idx_list, classes, pklfile_prefix_,
                    submission_prefix_)
                result_dict[name] = result_list_
        return result_dict, tmp_dir

-    def bbox2result_kitti(self,
-                          net_outputs: list,
-                          sample_id_list: list,
-                          class_names: list,
-                          pklfile_prefix: str = None,
-                          submission_prefix: str = None):
+    def bbox2result_kitti(
+            self,
+            net_outputs: List[dict],
+            sample_idx_list: List[int],
+            class_names: List[str],
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
        """Convert 3D detection results to kitti format for evaluation and test
        submission.

        Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
-            class_names (list[String]): A list of class names.
+            sample_idx_list (List[int]): List of input sample idx.
+            class_names (List[str]): A list of class names.
            pklfile_prefix (str, optional): The prefix of pkl file.
                Defaults to None.
            submission_prefix (str, optional): The prefix of submission file.
                Defaults to None.

        Returns:
-            list[dict]: A list of dictionaries with the kitti format.
+            List[dict]: A list of dictionaries with the kitti format.
        """
        assert len(net_outputs) == len(self.data_infos), \
            'invalid list length of network outputs'
@@ -345,8 +350,7 @@ class KittiMetric(BaseMetric):
        print('\nConverting 3D prediction to KITTI format')
        for idx, pred_dicts in enumerate(
                mmengine.track_iter_progress(net_outputs)):
-            annos = []
-            sample_idx = sample_id_list[idx]
+            sample_idx = sample_idx_list[idx]
            info = self.data_infos[sample_idx]
            # Here default used 'CAM2' to compute metric. If you want to
            # use another camera, please modify it.
@@ -393,7 +397,6 @@ class KittiMetric(BaseMetric):
                    anno['score'].append(score)

                anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)
            else:
                anno = {
                    'name': np.array([]),
@@ -406,7 +409,6 @@ class KittiMetric(BaseMetric):
                    'rotation_y': np.array([]),
                    'score': np.array([]),
                }
-                annos.append(anno)

            if submission_prefix is not None:
                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
@@ -428,10 +430,10 @@ class KittiMetric(BaseMetric):
                                anno['score'][idx]),
                            file=f)

-            annos[-1]['sample_id'] = np.array(
-                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)
+            anno['sample_idx'] = np.array(
+                [sample_idx] * len(anno['score']), dtype=np.int64)

-            det_annos += annos
+            det_annos.append(anno)

        if pklfile_prefix is not None:
            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -443,27 +445,28 @@ class KittiMetric(BaseMetric):

        return det_annos

-    def bbox2result_kitti2d(self,
-                            net_outputs: list,
-                            sample_id_list,
-                            class_names: list,
-                            pklfile_prefix: str = None,
-                            submission_prefix: str = None):
+    def bbox2result_kitti2d(
+            self,
+            net_outputs: List[dict],
+            sample_idx_list: List[int],
+            class_names: List[str],
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
        """Convert 2D detection results to kitti format for evaluation and test
        submission.

        Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
-            class_names (list[String]): A list of class names.
+            sample_idx_list (List[int]): List of input sample idx.
+            class_names (List[str]): A list of class names.
            pklfile_prefix (str, optional): The prefix of pkl file.
                Defaults to None.
            submission_prefix (str, optional): The prefix of submission file.
                Defaults to None.

        Returns:
-            list[dict]: A list of dictionaries have the kitti format
+            List[dict]: A list of dictionaries with the kitti format.
        """
        assert len(net_outputs) == len(self.data_infos), \
            'invalid list length of network outputs'
@@ -471,7 +474,6 @@ class KittiMetric(BaseMetric):
        print('\nConverting 2D prediction to KITTI format')
        for i, bboxes_per_sample in enumerate(
                mmengine.track_iter_progress(net_outputs)):
-            annos = []
            anno = dict(
                name=[],
                truncated=[],
@@ -482,7 +484,7 @@ class KittiMetric(BaseMetric):
                location=[],
                rotation_y=[],
                score=[])
-            sample_idx = sample_id_list[i]
+            sample_idx = sample_idx_list[i]

            num_example = 0
            bbox = bboxes_per_sample['bboxes']
@@ -504,8 +506,7 @@ class KittiMetric(BaseMetric):
                num_example += 1

            if num_example == 0:
-                annos.append(
-                    dict(
+                anno = dict(
                    name=np.array([]),
                    truncated=np.array([]),
                    occluded=np.array([]),
@@ -515,14 +516,13 @@ class KittiMetric(BaseMetric):
                    location=np.zeros([0, 3]),
                    rotation_y=np.array([]),
                    score=np.array([]),
-                    ))
+                )
            else:
                anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)

-            annos[-1]['sample_id'] = np.array(
+            anno['sample_idx'] = np.array(
                [sample_idx] * num_example, dtype=np.int64)
-            det_annos += annos
+            det_annos.append(anno)

        if pklfile_prefix is not None:
            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -537,7 +537,7 @@ class KittiMetric(BaseMetric):
            mmengine.mkdir_or_exist(submission_prefix)
            print(f'Saving KITTI submission to {submission_prefix}')
            for i, anno in enumerate(det_annos):
-                sample_idx = sample_id_list[i]
+                sample_idx = sample_idx_list[i]
                cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
                with open(cur_det_file, 'w') as f:
                    bbox = anno['bbox']
@@ -560,15 +560,15 @@ class KittiMetric(BaseMetric):

        return det_annos

-    def convert_valid_bboxes(self, box_dict: dict, info: dict):
+    def convert_valid_bboxes(self, box_dict: dict, info: dict) -> dict:
        """Convert the predicted boxes into valid ones.

        Args:
            box_dict (dict): Box dictionaries to be converted.

-                - boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
-                - scores_3d (torch.Tensor): Scores of boxes.
-                - labels_3d (torch.Tensor): Class labels of boxes.
+                - bboxes_3d (:obj:`BaseInstance3DBoxes`): 3D bounding boxes.
+                - scores_3d (Tensor): Scores of boxes.
+                - labels_3d (Tensor): Class labels of boxes.
            info (dict): Data info.

        Returns:
@@ -654,5 +654,5 @@ class KittiMetric(BaseMetric):
                box3d_camera=np.zeros([0, 7]),
                box3d_lidar=np.zeros([0, 7]),
                scores=np.zeros([0]),
-                label_preds=np.zeros([0, 4]),
+                label_preds=np.zeros([0]),
                sample_idx=sample_idx)
--- a/mmdet3d/evaluation/metrics/nuscenes_metric.py
+++ b/mmdet3d/evaluation/metrics/nuscenes_metric.py
--- a/mmdet3d/evaluation/metrics/waymo_metric.py
+++ b/mmdet3d/evaluation/metrics/waymo_metric.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import tempfile
 from os import path as osp
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Tuple, Union

 import mmengine
 import numpy as np
@@ -26,49 +26,55 @@ class WaymoMetric(KittiMetric):
        waymo_bin_file (str): The path of the annotation file in waymo format.
        data_root (str): Path of dataset root.
            Used for storing waymo evaluation programs.
-        split (str): The split of the evaluation set.
-        metric (str | list[str]): Metrics to be evaluated.
-            Default to 'mAP'.
-        pcd_limit_range (list): The range of point cloud used to
+        split (str): The split of the evaluation set. Defaults to 'training'.
+        metric (str or List[str]): Metrics to be evaluated.
+            Defaults to 'mAP'.
+        pcd_limit_range (List[float]): The range of point cloud used to
            filter invalid predicted boxes.
-            Default to [0, -40, -3, 70.4, 40, 0.0].
+            Defaults to [-85, -85, -5, 85, 85, 5].
+        convert_kitti_format (bool): Whether to convert the results to
+            kitti format. Now, in order to be compatible with camera-based
+            methods, defaults to True.
        prefix (str, optional): The prefix that will be added in the metric
            names to disambiguate homonymous metrics of different evaluators.
            If prefix is not provided in the argument, self.default_prefix
            will be used instead. Defaults to None.
-        convert_kitti_format (bool, optional): Whether convert the reuslts to
-            kitti format. Now, in order to be compatible with camera-based
-            methods, defaults to True.
+        format_only (bool): Format the output results without perform
+            evaluation. It is useful when you want to format the result
+            to a specific format and submit it to the test server.
+            Defaults to False.
        pklfile_prefix (str, optional): The prefix of pkl files, including
            the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
+            If not specified, a temp file will be created. Defaults to None.
        submission_prefix (str, optional): The prefix of submission data.
            If not specified, the submission data will not be generated.
-            Default: None.
-        load_type (str, optional): Type of loading mode during training.
+            Defaults to None.
+        load_type (str): Type of loading mode during training.

            - 'frame_based': Load all of the instances in the frame.
            - 'mv_image_based': Load all of the instances in the frame and need
              to convert to the FOV-based data type to support image-based
              detector.
-            - 'fov_image_base': Only load the instances inside the default cam,
-                and need to convert to the FOV-based data type to support
+            - 'fov_image_based': Only load the instances inside the default
+              cam, and need to convert to the FOV-based data type to support
              image-based detector.
-        default_cam_key (str, optional): The default camera for lidar to
-            camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
-        use_pred_sample_idx (bool, optional): In formating results, use the
-            sample index from the prediction or from the load annoataitons.
+        default_cam_key (str): The default camera for lidar to camera
+            conversion. By default, KITTI: 'CAM2', Waymo: 'CAM_FRONT'.
+            Defaults to 'CAM_FRONT'.
+        use_pred_sample_idx (bool): In formating results, use the
+            sample index from the prediction or from the load annotations.
            By default, KITTI: True, Waymo: False, Waymo has a conversion
-            process, which needs to use the sample id from load annotation.
+            process, which needs to use the sample idx from load annotation.
+            Defaults to False.
        collect_device (str): Device name used for collecting results
            from different ranks during distributed training. Must be 'cpu' or
            'gpu'. Defaults to 'cpu'.
-        file_client_args (dict): file client for reading gt in waymo format.
+        file_client_args (dict): File client for reading gt in waymo format.
            Defaults to ``dict(backend='disk')``.
-        idx2metainfo (Optional[str], optional): The file path of the metainfo
-            in waymmo. It stores the mapping from sample_idx to metainfo.
-            The metainfo must contain the keys: 'idx2contextname' and
-            'idx2timestamp'. Defaults to None.
+        idx2metainfo (str, optional): The file path of the metainfo in waymo.
+            It stores the mapping from sample_idx to metainfo. The metainfo
+            must contain the keys: 'idx2contextname' and 'idx2timestamp'.
+            Defaults to None.
    """
    num_cams = 5

@@ -81,14 +87,15 @@ class WaymoMetric(KittiMetric):
                 pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5],
                 convert_kitti_format: bool = True,
                 prefix: Optional[str] = None,
-                 pklfile_prefix: str = None,
-                 submission_prefix: str = None,
+                 format_only: bool = False,
+                 pklfile_prefix: Optional[str] = None,
+                 submission_prefix: Optional[str] = None,
                 load_type: str = 'frame_based',
                 default_cam_key: str = 'CAM_FRONT',
                 use_pred_sample_idx: bool = False,
                 collect_device: str = 'cpu',
                 file_client_args: dict = dict(backend='disk'),
-                 idx2metainfo: Optional[str] = None):
+                 idx2metainfo: Optional[str] = None) -> None:
        self.waymo_bin_file = waymo_bin_file
        self.data_root = data_root
        self.split = split
@@ -101,7 +108,7 @@ class WaymoMetric(KittiMetric):
        else:
            self.idx2metainfo = None

-        super().__init__(
+        super(WaymoMetric, self).__init__(
            ann_file=ann_file,
            metric=metric,
            pcd_limit_range=pcd_limit_range,
@@ -111,13 +118,20 @@ class WaymoMetric(KittiMetric):
            default_cam_key=default_cam_key,
            collect_device=collect_device,
            file_client_args=file_client_args)
+        self.format_only = format_only
+        if self.format_only:
+            assert pklfile_prefix is not None, 'pklfile_prefix must be '
+            'not None when format_only is True, otherwise the result files '
+            'will be saved to a temp directory which will be cleaned up at '
+            'the end.'
+
        self.default_prefix = 'Waymo metric'

-    def compute_metrics(self, results: list) -> Dict[str, float]:
+    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
        """Compute the metrics from processed results.

        Args:
-            results (list): The processed results of the whole dataset.
+            results (List[dict]): The processed results of the whole dataset.

        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
@@ -155,7 +169,7 @@ class WaymoMetric(KittiMetric):
                    if 'image_sweeps' in info:
                        camera_info['image_sweeps'] = info['image_sweeps']

-                    # TODO check if need to modify the sample id
+                    # TODO check if need to modify the sample idx
                    # TODO check when will use it except for evaluation.
                    camera_info['sample_idx'] = info['sample_idx']
                    new_data_infos.append(camera_info)
@@ -175,6 +189,12 @@ class WaymoMetric(KittiMetric):
            classes=self.classes)

        metric_dict = {}
+
+        if self.format_only:
+            logger.info('results are saved in '
+                        f'{osp.dirname(self.pklfile_prefix)}')
+            return metric_dict
+
        for metric in self.metrics:
            ap_dict = self.waymo_evaluate(
                pklfile_prefix, metric=metric, logger=logger)
@@ -188,19 +208,19 @@ class WaymoMetric(KittiMetric):

    def waymo_evaluate(self,
                       pklfile_prefix: str,
-                       metric: str = None,
-                       logger: MMLogger = None) -> dict:
+                       metric: Optional[str] = None,
+                       logger: Optional[MMLogger] = None) -> Dict[str, float]:
        """Evaluation in Waymo protocol.

        Args:
            pklfile_prefix (str): The location that stored the prediction
                results.
-            metric (str): Metric to be evaluated. Defaults to None.
+            metric (str, optional): Metric to be evaluated. Defaults to None.
            logger (MMLogger, optional): Logger used for printing
-                related information during evaluation. Default: None.
+                related information during evaluation. Defaults to None.

        Returns:
-            dict[str, float]: Results of each evaluation metric.
+            Dict[str, float]: Results of each evaluation metric.
        """

        import subprocess
@@ -238,8 +258,6 @@ class WaymoMetric(KittiMetric):
            }
            mAP_splits = ret_texts.split('mAP ')
            mAPH_splits = ret_texts.split('mAPH ')
-            mAP_splits = ret_texts.split('mAP ')
-            mAPH_splits = ret_texts.split('mAPH ')
            for idx, key in enumerate(ap_dict.keys()):
                split_idx = int(idx / 2) + 1
                if idx % 2 == 0:  # mAP
@@ -307,26 +325,27 @@ class WaymoMetric(KittiMetric):
                    ap_dict['Cyclist mAPH']) / 3
        return ap_dict

-    def format_results(self,
+    def format_results(
+        self,
        results: List[dict],
-                       pklfile_prefix: str = None,
-                       submission_prefix: str = None,
-                       classes: List[str] = None):
+        pklfile_prefix: Optional[str] = None,
+        submission_prefix: Optional[str] = None,
+        classes: Optional[List[str]] = None
+    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
        """Format the results to bin file.

        Args:
-            results (list[dict]): Testing results of the
-                dataset.
+            results (List[dict]): Testing results of the dataset.
            pklfile_prefix (str, optional): The prefix of pkl files. It
                includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
            submission_prefix (str, optional): The prefix of submitted files.
                It includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+                Defaults to None.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.

        Returns:
            tuple: (result_dict, tmp_dir), result_dict is a dict containing
@@ -378,15 +397,16 @@ class WaymoMetric(KittiMetric):
        return final_results, waymo_save_tmp_dir

    def merge_multi_view_boxes(self, box_dict_per_frame: List[dict],
-                               cam0_info: dict):
+                               cam0_info: dict) -> dict:
        """Merge bounding boxes predicted from multi-view images.
+
        Args:
-            box_dict_per_frame (list[dict]): The results of prediction
+            box_dict_per_frame (List[dict]): The results of prediction
                for each camera.
-            cam2_info (dict): store the sample id for the given frame.
+            cam0_info (dict): Store the sample idx for the given frame.

        Returns:
-            merged_box_dict (dict), store the merge results
+            dict: Merged results.
        """
        box_dict = dict()
        # convert list[dict] to dict[list]
@@ -444,27 +464,28 @@ class WaymoMetric(KittiMetric):
        )
        return merged_box_dict

-    def bbox2result_kitti(self,
-                          net_outputs: list,
-                          sample_id_list: list,
-                          class_names: list,
-                          pklfile_prefix: str = None,
-                          submission_prefix: str = None):
+    def bbox2result_kitti(
+            self,
+            net_outputs: List[dict],
+            sample_idx_list: List[int],
+            class_names: List[str],
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
        """Convert 3D detection results to kitti format for evaluation and test
        submission.

        Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
-            class_names (list[String]): A list of class names.
+            sample_idx_list (List[int]): List of input sample idx.
+            class_names (List[str]): A list of class names.
            pklfile_prefix (str, optional): The prefix of pkl file.
                Defaults to None.
            submission_prefix (str, optional): The prefix of submission file.
                Defaults to None.

        Returns:
-            list[dict]: A list of dictionaries with the kitti format.
+            List[dict]: A list of dictionaries with the kitti format.
        """
        if submission_prefix is not None:
            mmengine.mkdir_or_exist(submission_prefix)
@@ -473,8 +494,7 @@ class WaymoMetric(KittiMetric):
        print('\nConverting prediction to KITTI format')
        for idx, pred_dicts in enumerate(
                mmengine.track_iter_progress(net_outputs)):
-            annos = []
-            sample_idx = sample_id_list[idx]
+            sample_idx = sample_idx_list[idx]
            info = self.data_infos[sample_idx]

            if self.load_type == 'mv_image_based':
@@ -536,7 +556,6 @@ class WaymoMetric(KittiMetric):
                    anno['score'].append(score)

                anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)
            else:
                anno = {
                    'name': np.array([]),
@@ -549,7 +568,6 @@ class WaymoMetric(KittiMetric):
                    'rotation_y': np.array([]),
                    'score': np.array([]),
                }
-                annos.append(anno)

            if submission_prefix is not None:
                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
@@ -577,10 +595,10 @@ class WaymoMetric(KittiMetric):
                # In waymo validation sample_idx in prediction is 000xxx
                # but in info file it is 1000xxx
                save_sample_idx = box_dict['sample_idx']
-            annos[-1]['sample_idx'] = np.array(
-                [save_sample_idx] * len(annos[-1]['score']), dtype=np.int64)
+            anno['sample_idx'] = np.array(
+                [save_sample_idx] * len(anno['score']), dtype=np.int64)

-            det_annos += annos
+            det_annos.append(anno)

        if pklfile_prefix is not None:
            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -592,16 +610,16 @@ class WaymoMetric(KittiMetric):

        return det_annos

-    def convert_valid_bboxes(self, box_dict: dict, info: dict):
+    def convert_valid_bboxes(self, box_dict: dict, info: dict) -> dict:
        """Convert the predicted boxes into valid ones. Should handle the
        load_model (frame_based, mv_image_based, fov_image_based), separately.

        Args:
            box_dict (dict): Box dictionaries to be converted.

-                - bboxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
-                - scores_3d (torch.Tensor): Scores of boxes.
-                - labels_3d (torch.Tensor): Class labels of boxes.
+                - bboxes_3d (:obj:`BaseInstance3DBoxes`): 3D bounding boxes.
+                - scores_3d (Tensor): Scores of boxes.
+                - labels_3d (Tensor): Class labels of boxes.
            info (dict): Data info.

        Returns:
@@ -673,7 +691,7 @@ class WaymoMetric(KittiMetric):
            valid_pcd_inds = ((box_preds_lidar.center > limit_range[:3]) &
                              (box_preds_lidar.center < limit_range[3:]))
            valid_inds = valid_pcd_inds.all(-1)
-        if self.load_type in ['mv_image_based', 'fov_image_based']:
+        elif self.load_type in ['mv_image_based', 'fov_image_based']:
            valid_inds = valid_cam_inds

        if valid_inds.sum() > 0: