[Enhance]: Add format_only option for nuScenes and Waymo evaluation (#2151)

* support submit test set on nuscenes and waymo dataset * Fix typo

[Enhance]: Add format_only option for nuScenes and Waymo evaluation (#2151)
* support submit test set on nuscenes and waymo dataset * Fix typo
8bf2f5a4 · Xiang Xu · GitHub · ed081770 · 8bf2f5a4 · 8bf2f5a4
Unverified Commit 8bf2f5a4 authored Jan 18, 2023 by Xiang Xu Committed by GitHub Jan 18, 2023
4 changed files
--- a/configs/_base_/datasets/kitti-mono3d.py
+++ b/configs/_base_/datasets/kitti-mono3d.py
@@ -80,8 +80,7 @@ test_dataloader = val_dataloader
 val_evaluator = dict(
    type='KittiMetric',
    ann_file=data_root + 'kitti_infos_val.pkl',
-    metric='bbox',
+    metric='bbox')
-    pred_box_type_3d='Camera')
 test_evaluator = val_evaluator

--- a/mmdet3d/evaluation/metrics/kitti_metric.py
+++ b/mmdet3d/evaluation/metrics/kitti_metric.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import tempfile
 from os import path as osp
-from typing import Dict, List, Optional, Sequence, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union
 import mmengine
 import numpy as np
@@ -22,44 +22,49 @@ class KittiMetric(BaseMetric):
    Args:
        ann_file (str): Annotation file path.
-        metric (str | list[str]): Metrics to be evaluated.
+        metric (str or List[str]): Metrics to be evaluated.
-            Default to 'bbox'.
+            Defaults to 'bbox'.
-        pcd_limit_range (list): The range of point cloud used to
+        pcd_limit_range (List[float]): The range of point cloud used to
            filter invalid predicted boxes.
-            Default to [0, -40, -3, 70.4, 40, 0.0].
+            Defaults to [0, -40, -3, 70.4, 40, 0.0].
        prefix (str, optional): The prefix that will be added in the metric
            names to disambiguate homonymous metrics of different evaluators.
            If prefix is not provided in the argument, self.default_prefix
            will be used instead. Defaults to None.
        pklfile_prefix (str, optional): The prefix of pkl files, including
            the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
+            If not specified, a temp file will be created. Defaults to None.
-        default_cam_key (str, optional): The default camera for lidar to
+        default_cam_key (str): The default camera for lidar to camera
-            camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
+            conversion. By default, KITTI: 'CAM2', Waymo: 'CAM_FRONT'.
+            Defaults to 'CAM2'
        format_only (bool): Format the output results without perform
            evaluation. It is useful when you want to format the result
            to a specific format and submit it to the test server.
            Defaults to False.
        submission_prefix (str, optional): The prefix of submission data.
            If not specified, the submission data will not be generated.
-            Default: None.
+            Defaults to None.
        collect_device (str): Device name used for collecting results
            from different ranks during distributed training. Must be 'cpu' or
            'gpu'. Defaults to 'cpu'.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmengine.fileio.FileClient` for details.
+            Defaults to dict(backend='disk').
    """
-    def __init__(self,
+    def __init__(
-                 ann_file: str,
+        self,
-                 metric: Union[str, List[str]] = 'bbox',
+        ann_file: str,
-                 pred_box_type_3d: str = 'LiDAR',
+        metric: Union[str, List[str]] = 'bbox',
-                 pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
+        pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
-                 prefix: Optional[str] = None,
+        prefix: Optional[str] = None,
-                 pklfile_prefix: str = None,
+        pklfile_prefix: Optional[str] = None,
-                 default_cam_key: str = 'CAM2',
+        default_cam_key: str = 'CAM2',
-                 format_only: bool = False,
+        format_only: bool = False,
-                 submission_prefix: str = None,
+        submission_prefix: Optional[str] = None,
-                 collect_device: str = 'cpu',
+        collect_device: str = 'cpu',
-                 file_client_args: dict = dict(backend='disk')):
+        file_client_args: dict = dict(backend='disk')
+    ) -> None:
        self.default_prefix = 'Kitti metric'
        super(KittiMetric, self).__init__(
            collect_device=collect_device, prefix=prefix)
@@ -68,25 +73,23 @@ class KittiMetric(BaseMetric):
        self.pklfile_prefix = pklfile_prefix
        self.format_only = format_only
        if self.format_only:
-            assert submission_prefix is not None, 'submission_prefix must be'
+            assert submission_prefix is not None, 'submission_prefix must be '
-            'not None when format_only is True, otherwise the result files'
+            'not None when format_only is True, otherwise the result files '
-            'will be saved to a temp directory which will be cleaned up at'
+            'will be saved to a temp directory which will be cleaned up at '
            'the end.'
        self.submission_prefix = submission_prefix
-        self.pred_box_type_3d = pred_box_type_3d
        self.default_cam_key = default_cam_key
        self.file_client_args = file_client_args
-        self.default_cam_key = default_cam_key
        allowed_metrics = ['bbox', 'img_bbox', 'mAP', 'LET_mAP']
        self.metrics = metric if isinstance(metric, list) else [metric]
        for metric in self.metrics:
            if metric not in allowed_metrics:
                raise KeyError("metric should be one of 'bbox', 'img_bbox', "
-                               'but got {metric}.')
+                               f'but got {metric}.')
-    def convert_annos_to_kitti_annos(self, data_infos: dict) -> list:
+    def convert_annos_to_kitti_annos(self, data_infos: dict) -> List[dict]:
        """Convert loading annotations to Kitti annotations.
        Args:
@@ -169,13 +172,13 @@ class KittiMetric(BaseMetric):
            result['pred_instances'] = pred_2d
            sample_idx = data_sample['sample_idx']
            result['sample_idx'] = sample_idx
-        self.results.append(result)
+            self.results.append(result)
-    def compute_metrics(self, results: list) -> Dict[str, float]:
+    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
        """Compute the metrics from processed results.
        Args:
-            results (list): The processed results of the whole dataset.
+            results (List[dict]): The processed results of the whole dataset.
        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
@@ -220,25 +223,25 @@ class KittiMetric(BaseMetric):
        return metric_dict
    def kitti_evaluate(self,
-                       results_dict: List[dict],
+                       results_dict: dict,
                       gt_annos: List[dict],
-                       metric: str = None,
+                       metric: Optional[str] = None,
-                       classes: List[str] = None,
+                       classes: Optional[List[str]] = None,
-                       logger: MMLogger = None) -> dict:
+                       logger: Optional[MMLogger] = None) -> Dict[str, float]:
        """Evaluation in KITTI protocol.
        Args:
            results_dict (dict): Formatted results of the dataset.
-            gt_annos (list[dict]): Contain gt information of each sample.
+            gt_annos (List[dict]): Contain gt information of each sample.
            metric (str, optional): Metrics to be evaluated.
-                Default: None.
+                Defaults to None.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
            logger (MMLogger, optional): Logger used for printing
-                related information during evaluation. Default: None.
+                related information during evaluation. Defaults to None.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
        Returns:
-            dict[str, float]: Results of each evaluation metric.
+            Dict[str, float]: Results of each evaluation metric.
        """
        ap_dict = dict()
        for name in results_dict:
@@ -249,37 +252,38 @@ class KittiMetric(BaseMetric):
            ap_result_str, ap_dict_ = kitti_eval(
                gt_annos, results_dict[name], classes, eval_types=eval_types)
            for ap_type, ap in ap_dict_.items():
-                ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
+                ap_dict[f'{name}/{ap_type}'] = float(f'{ap:.4f}')
            print_log(f'Results of {name}:\n' + ap_result_str, logger=logger)
        return ap_dict
-    def format_results(self,
+    def format_results(
-                       results: List[dict],
+        self,
-                       pklfile_prefix: str = None,
+        results: List[dict],
-                       submission_prefix: str = None,
+        pklfile_prefix: Optional[str] = None,
-                       classes: List[str] = None):
+        submission_prefix: Optional[str] = None,
+        classes: Optional[List[str]] = None
+    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
        """Format the results to pkl file.
        Args:
-            results (list[dict]): Testing results of the
+            results (List[dict]): Testing results of the dataset.
-                dataset.
            pklfile_prefix (str, optional): The prefix of pkl files. It
                includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
            submission_prefix (str, optional): The prefix of submitted files.
                It includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
-            classes (list[String], optional): A list of class name. Defaults
+            classes (List[str], optional): A list of class name.
-                to None.
+                Defaults to None.
        Returns:
            tuple: (result_dict, tmp_dir), result_dict is a dict containing
-                the formatted result, tmp_dir is the temporal directory created
+            the formatted result, tmp_dir is the temporal directory created
-                for saving json files when jsonfile_prefix is not specified.
+            for saving json files when jsonfile_prefix is not specified.
        """
        if pklfile_prefix is None:
            tmp_dir = tempfile.TemporaryDirectory()
@@ -287,7 +291,7 @@ class KittiMetric(BaseMetric):
        else:
            tmp_dir = None
        result_dict = dict()
-        sample_id_list = [result['sample_idx'] for result in results]
+        sample_idx_list = [result['sample_idx'] for result in results]
        for name in results[0]:
            if submission_prefix is not None:
                submission_prefix_ = osp.join(submission_prefix, name)
@@ -301,7 +305,7 @@ class KittiMetric(BaseMetric):
                    0] != '_' and results[0][name]:
                net_outputs = [result[name] for result in results]
                result_list_ = self.bbox2result_kitti(net_outputs,
-                                                      sample_id_list, classes,
+                                                      sample_idx_list, classes,
                                                      pklfile_prefix_,
                                                      submission_prefix_)
                result_dict[name] = result_list_
@@ -309,32 +313,33 @@ class KittiMetric(BaseMetric):
                    name]:
                net_outputs = [result[name] for result in results]
                result_list_ = self.bbox2result_kitti2d(
-                    net_outputs, sample_id_list, classes, pklfile_prefix_,
+                    net_outputs, sample_idx_list, classes, pklfile_prefix_,
                    submission_prefix_)
                result_dict[name] = result_list_
        return result_dict, tmp_dir
-    def bbox2result_kitti(self,
+    def bbox2result_kitti(
-                          net_outputs: list,
+            self,
-                          sample_id_list: list,
+            net_outputs: List[dict],
-                          class_names: list,
+            sample_idx_list: List[int],
-                          pklfile_prefix: str = None,
+            class_names: List[str],
-                          submission_prefix: str = None):
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
        """Convert 3D detection results to kitti format for evaluation and test
        submission.
        Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
+            sample_idx_list (List[int]): List of input sample idx.
-            class_names (list[String]): A list of class names.
+            class_names (List[str]): A list of class names.
            pklfile_prefix (str, optional): The prefix of pkl file.
                Defaults to None.
            submission_prefix (str, optional): The prefix of submission file.
                Defaults to None.
        Returns:
-            list[dict]: A list of dictionaries with the kitti format.
+            List[dict]: A list of dictionaries with the kitti format.
        """
        assert len(net_outputs) == len(self.data_infos), \
            'invalid list length of network outputs'
@@ -345,8 +350,7 @@ class KittiMetric(BaseMetric):
        print('\nConverting 3D prediction to KITTI format')
        for idx, pred_dicts in enumerate(
                mmengine.track_iter_progress(net_outputs)):
-            annos = []
+            sample_idx = sample_idx_list[idx]
-            sample_idx = sample_id_list[idx]
            info = self.data_infos[sample_idx]
            # Here default used 'CAM2' to compute metric. If you want to
            # use another camera, please modify it.
@@ -393,7 +397,6 @@ class KittiMetric(BaseMetric):
                    anno['score'].append(score)
                anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)
            else:
                anno = {
                    'name': np.array([]),
@@ -406,7 +409,6 @@ class KittiMetric(BaseMetric):
                    'rotation_y': np.array([]),
                    'score': np.array([]),
                }
-                annos.append(anno)
            if submission_prefix is not None:
                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
@@ -428,10 +430,10 @@ class KittiMetric(BaseMetric):
                                anno['score'][idx]),
                            file=f)
-            annos[-1]['sample_id'] = np.array(
+            anno['sample_idx'] = np.array(
-                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)
+                [sample_idx] * len(anno['score']), dtype=np.int64)
-            det_annos += annos
+            det_annos.append(anno)
        if pklfile_prefix is not None:
            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -443,27 +445,28 @@ class KittiMetric(BaseMetric):
        return det_annos
-    def bbox2result_kitti2d(self,
+    def bbox2result_kitti2d(
-                            net_outputs: list,
+            self,
-                            sample_id_list,
+            net_outputs: List[dict],
-                            class_names: list,
+            sample_idx_list: List[int],
-                            pklfile_prefix: str = None,
+            class_names: List[str],
-                            submission_prefix: str = None):
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
        """Convert 2D detection results to kitti format for evaluation and test
        submission.
        Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
+            sample_idx_list (List[int]): List of input sample idx.
-            class_names (list[String]): A list of class names.
+            class_names (List[str]): A list of class names.
            pklfile_prefix (str, optional): The prefix of pkl file.
                Defaults to None.
            submission_prefix (str, optional): The prefix of submission file.
                Defaults to None.
        Returns:
-            list[dict]: A list of dictionaries have the kitti format
+            List[dict]: A list of dictionaries with the kitti format.
        """
        assert len(net_outputs) == len(self.data_infos), \
            'invalid list length of network outputs'
@@ -471,7 +474,6 @@ class KittiMetric(BaseMetric):
        print('\nConverting 2D prediction to KITTI format')
        for i, bboxes_per_sample in enumerate(
                mmengine.track_iter_progress(net_outputs)):
-            annos = []
            anno = dict(
                name=[],
                truncated=[],
@@ -482,7 +484,7 @@ class KittiMetric(BaseMetric):
                location=[],
                rotation_y=[],
                score=[])
-            sample_idx = sample_id_list[i]
+            sample_idx = sample_idx_list[i]
            num_example = 0
            bbox = bboxes_per_sample['bboxes']
@@ -504,25 +506,23 @@ class KittiMetric(BaseMetric):
                num_example += 1
            if num_example == 0:
-                annos.append(
+                anno = dict(
-                    dict(
+                    name=np.array([]),
-                        name=np.array([]),
+                    truncated=np.array([]),
-                        truncated=np.array([]),
+                    occluded=np.array([]),
-                        occluded=np.array([]),
+                    alpha=np.array([]),
-                        alpha=np.array([]),
+                    bbox=np.zeros([0, 4]),
-                        bbox=np.zeros([0, 4]),
+                    dimensions=np.zeros([0, 3]),
-                        dimensions=np.zeros([0, 3]),
+                    location=np.zeros([0, 3]),
-                        location=np.zeros([0, 3]),
+                    rotation_y=np.array([]),
-                        rotation_y=np.array([]),
+                    score=np.array([]),
-                        score=np.array([]),
+                )
-                    ))
            else:
                anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)
-            annos[-1]['sample_id'] = np.array(
+            anno['sample_idx'] = np.array(
                [sample_idx] * num_example, dtype=np.int64)
-            det_annos += annos
+            det_annos.append(anno)
        if pklfile_prefix is not None:
            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -537,7 +537,7 @@ class KittiMetric(BaseMetric):
            mmengine.mkdir_or_exist(submission_prefix)
            print(f'Saving KITTI submission to {submission_prefix}')
            for i, anno in enumerate(det_annos):
-                sample_idx = sample_id_list[i]
+                sample_idx = sample_idx_list[i]
                cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
                with open(cur_det_file, 'w') as f:
                    bbox = anno['bbox']
@@ -560,15 +560,15 @@ class KittiMetric(BaseMetric):
        return det_annos
-    def convert_valid_bboxes(self, box_dict: dict, info: dict):
+    def convert_valid_bboxes(self, box_dict: dict, info: dict) -> dict:
        """Convert the predicted boxes into valid ones.
        Args:
            box_dict (dict): Box dictionaries to be converted.
-                - boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
+                - bboxes_3d (:obj:`BaseInstance3DBoxes`): 3D bounding boxes.
-                - scores_3d (torch.Tensor): Scores of boxes.
+                - scores_3d (Tensor): Scores of boxes.
-                - labels_3d (torch.Tensor): Class labels of boxes.
+                - labels_3d (Tensor): Class labels of boxes.
            info (dict): Data info.
        Returns:
@@ -576,9 +576,9 @@ class KittiMetric(BaseMetric):
                - bbox (np.ndarray): 2D bounding boxes.
                - box3d_camera (np.ndarray): 3D bounding boxes in
-                    camera coordinate.
+                  camera coordinate.
                - box3d_lidar (np.ndarray): 3D bounding boxes in
-                    LiDAR coordinate.
+                  LiDAR coordinate.
                - scores (np.ndarray): Scores of boxes.
                - label_preds (np.ndarray): Class label predictions.
                - sample_idx (int): Sample index.
@@ -654,5 +654,5 @@ class KittiMetric(BaseMetric):
                box3d_camera=np.zeros([0, 7]),
                box3d_lidar=np.zeros([0, 7]),
                scores=np.zeros([0]),
-                label_preds=np.zeros([0, 4]),
+                label_preds=np.zeros([0]),
                sample_idx=sample_idx)
--- a/mmdet3d/evaluation/metrics/nuscenes_metric.py
+++ b/mmdet3d/evaluation/metrics/nuscenes_metric.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import logging
 import tempfile
 from os import path as osp
 from typing import Dict, List, Optional, Sequence, Tuple, Union
@@ -28,22 +27,29 @@ class NuScenesMetric(BaseMetric):
    Args:
        data_root (str): Path of dataset root.
        ann_file (str): Path of annotation file.
-        metric (str | list[str]): Metrics to be evaluated.
+        metric (str or List[str]): Metrics to be evaluated.
-            Default to 'bbox'.
+            Defaults to 'bbox'.
        modality (dict): Modality to specify the sensor data used
            as input. Defaults to dict(use_camera=False, use_lidar=True).
        prefix (str, optional): The prefix that will be added in the metric
            names to disambiguate homonymous metrics of different evaluators.
            If prefix is not provided in the argument, self.default_prefix
            will be used instead. Defaults to None.
+        format_only (bool): Format the output results without perform
+            evaluation. It is useful when you want to format the result
+            to a specific format and submit it to the test server.
+            Defaults to False.
        jsonfile_prefix (str, optional): The prefix of json files including
            the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
+            If not specified, a temp file will be created. Defaults to None.
        eval_version (str): Configuration version of evaluation.
-            Defaults to  'detection_cvpr_2019'.
+            Defaults to 'detection_cvpr_2019'.
        collect_device (str): Device name used for collecting results
            from different ranks during distributed training. Must be 'cpu' or
            'gpu'. Defaults to 'cpu'.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmengine.fileio.FileClient` for details.
+            Defaults to dict(backend='disk').
    """
    NameMapping = {
        'movable_object.barrier': 'barrier',
@@ -87,8 +93,9 @@ class NuScenesMetric(BaseMetric):
        data_root: str,
        ann_file: str,
        metric: Union[str, List[str]] = 'bbox',
-        modality: Dict = dict(use_camera=False, use_lidar=True),
+        modality: dict = dict(use_camera=False, use_lidar=True),
        prefix: Optional[str] = None,
+        format_only: bool = False,
        jsonfile_prefix: Optional[str] = None,
        eval_version: str = 'detection_cvpr_2019',
        collect_device: str = 'cpu',
@@ -105,6 +112,13 @@ class NuScenesMetric(BaseMetric):
        self.ann_file = ann_file
        self.data_root = data_root
        self.modality = modality
+        self.format_only = format_only
+        if self.format_only:
+            assert jsonfile_prefix is not None, 'jsonfile_prefix must be '
+            'not None when format_only is True, otherwise the result files '
+            'will be saved to a temp directory which will be cleanup at '
+            'the end.'
        self.jsonfile_prefix = jsonfile_prefix
        self.file_client_args = file_client_args
@@ -137,13 +151,13 @@ class NuScenesMetric(BaseMetric):
            result['pred_instances'] = pred_2d
            sample_idx = data_sample['sample_idx']
            result['sample_idx'] = sample_idx
-        self.results.append(result)
+            self.results.append(result)
-    def compute_metrics(self, results: list) -> Dict[str, float]:
+    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
        """Compute the metrics from processed results.
        Args:
-            results (list): The processed results of each batch.
+            results (List[dict]): The processed results of each batch.
        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
@@ -160,6 +174,12 @@ class NuScenesMetric(BaseMetric):
                                                   self.jsonfile_prefix)
        metric_dict = {}
+        if self.format_only:
+            logger.info('results are saved in '
+                        f'{osp.basename(self.jsonfile_prefix)}')
+            return metric_dict
        for metric in self.metrics:
            ap_dict = self.nus_evaluate(
                result_dict, classes=classes, metric=metric, logger=logger)
@@ -173,21 +193,20 @@ class NuScenesMetric(BaseMetric):
    def nus_evaluate(self,
                     result_dict: dict,
                     metric: str = 'bbox',
-                     classes: List[str] = None,
+                     classes: Optional[List[str]] = None,
-                     logger: logging.Logger = None) -> dict:
+                     logger: Optional[MMLogger] = None) -> Dict[str, float]:
        """Evaluation in Nuscenes protocol.
        Args:
            result_dict (dict): Formatted results of the dataset.
-            metric (str): Metrics to be evaluated.
+            metric (str): Metrics to be evaluated. Defaults to 'bbox'.
-                Default: None.
+            classes (List[str], optional): A list of class name.
-            classes (list[String], optional): A list of class name. Defaults
+                Defaults to None.
-                to None.
            logger (MMLogger, optional): Logger used for printing
-                related information during evaluation. Default: None.
+                related information during evaluation. Defaults to None.
        Returns:
-            dict[str, float]: Results of each evaluation metric.
+            Dict[str, float]: Results of each evaluation metric.
        """
        metric_dict = dict()
        for name in result_dict:
@@ -197,22 +216,22 @@ class NuScenesMetric(BaseMetric):
        metric_dict.update(ret_dict)
        return metric_dict
-    def _evaluate_single(self,
+    def _evaluate_single(
-                         result_path: str,
+            self,
-                         classes: List[None] = None,
+            result_path: str,
-                         result_name: str = 'pred_instances_3d') -> dict:
+            classes: Optional[List[str]] = None,
+            result_name: str = 'pred_instances_3d') -> Dict[str, float]:
        """Evaluation for a single model in nuScenes protocol.
        Args:
            result_path (str): Path of the result file.
-                Default: 'bbox'.
+            classes (List[str], optional): A list of class name.
-            classes (list[String], optional): A list of class name. Defaults
+                Defaults to None.
-                to None.
            result_name (str): Result name in the metric prefix.
-                Default: 'pred_instances_3d'.
+                Defaults to 'pred_instances_3d'.
        Returns:
-            dict: Dictionary of evaluation details.
+            Dict[str, float]: Dictionary of evaluation details.
        """
        from nuscenes import NuScenes
        from nuscenes.eval.detection.evaluate import NuScenesEval
@@ -239,39 +258,41 @@ class NuScenesMetric(BaseMetric):
        metric_prefix = f'{result_name}_NuScenes'
        for name in classes:
            for k, v in metrics['label_aps'][name].items():
-                val = float('{:.4f}'.format(v))
+                val = float(f'{v:.4f}')
                detail[f'{metric_prefix}/{name}_AP_dist_{k}'] = val
            for k, v in metrics['label_tp_errors'][name].items():
-                val = float('{:.4f}'.format(v))
+                val = float(f'{v:.4f}')
                detail[f'{metric_prefix}/{name}_{k}'] = val
            for k, v in metrics['tp_errors'].items():
-                val = float('{:.4f}'.format(v))
+                val = float(f'{v:.4f}')
                detail[f'{metric_prefix}/{self.ErrNameMapping[k]}'] = val
        detail[f'{metric_prefix}/NDS'] = metrics['nd_score']
        detail[f'{metric_prefix}/mAP'] = metrics['mean_ap']
        return detail
-    def format_results(self,
+    def format_results(
-                       results: List[dict],
+        self,
-                       classes: List[str] = None,
+        results: List[dict],
-                       jsonfile_prefix: str = None) -> Tuple:
+        classes: Optional[List[str]] = None,
+        jsonfile_prefix: Optional[str] = None
+    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
        """Format the mmdet3d results to standard NuScenes json file.
        Args:
-            results (list[dict]): Testing results of the dataset.
+            results (List[dict]): Testing results of the dataset.
-            classes (list[String], optional): A list of class name. Defaults
+            classes (List[str], optional): A list of class name.
-                to None.
+                Defaults to None.
            jsonfile_prefix (str, optional): The prefix of json files. It
                includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
        Returns:
            tuple: Returns (result_dict, tmp_dir), where `result_dict` is a
-                dict containing the json filepaths, `tmp_dir` is the temporal
+            dict containing the json filepaths, `tmp_dir` is the temporal
-                directory created for saving json files when
+            directory created for saving json files when
-                `jsonfile_prefix` is not specified.
+            `jsonfile_prefix` is not specified.
        """
        assert isinstance(results, list), 'results must be a list'
@@ -281,7 +302,7 @@ class NuScenesMetric(BaseMetric):
        else:
            tmp_dir = None
        result_dict = dict()
-        sample_id_list = [result['sample_idx'] for result in results]
+        sample_idx_list = [result['sample_idx'] for result in results]
        for name in results[0]:
            if 'pred' in name and '3d' in name and name[0] != '_':
@@ -291,14 +312,14 @@ class NuScenesMetric(BaseMetric):
                box_type_3d = type(results_[0]['bboxes_3d'])
                if box_type_3d == LiDARInstance3DBoxes:
                    result_dict[name] = self._format_lidar_bbox(
-                        results_, sample_id_list, classes, tmp_file_)
+                        results_, sample_idx_list, classes, tmp_file_)
                elif box_type_3d == CameraInstance3DBoxes:
                    result_dict[name] = self._format_camera_bbox(
-                        results_, sample_id_list, classes, tmp_file_)
+                        results_, sample_idx_list, classes, tmp_file_)
        return result_dict, tmp_dir
-    def get_attr_name(self, attr_idx, label_name):
+    def get_attr_name(self, attr_idx: int, label_name: str) -> str:
        """Get attribute from predicted index.
        This is a workaround to predict attribute when the predicted velocity
@@ -347,16 +368,19 @@ class NuScenesMetric(BaseMetric):
    def _format_camera_bbox(self,
                            results: List[dict],
-                            sample_id_list: List[int],
+                            sample_idx_list: List[int],
-                            classes: List[str] = None,
+                            classes: Optional[List[str]] = None,
-                            jsonfile_prefix: str = None) -> str:
+                            jsonfile_prefix: Optional[str] = None) -> str:
        """Convert the results to the standard format.
        Args:
-            results (list[dict]): Testing results of the dataset.
+            results (List[dict]): Testing results of the dataset.
-            jsonfile_prefix (str): The prefix of the output jsonfile.
+            sample_idx_list (List[int]): List of result sample idx.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
+            jsonfile_prefix (str, optional): The prefix of the output jsonfile.
                You can specify the output directory/filename by
-                modifying the jsonfile_prefix. Default: None.
+                modifying the jsonfile_prefix. Defaults to None.
        Returns:
            str: Path of the output json file.
@@ -379,10 +403,10 @@ class NuScenesMetric(BaseMetric):
        for i, det in enumerate(mmengine.track_iter_progress(results)):
-            sample_id = sample_id_list[i]
+            sample_idx = sample_idx_list[i]
-            frame_sample_id = sample_id // CAM_NUM
+            frame_sample_idx = sample_idx // CAM_NUM
-            camera_type_id = sample_id % CAM_NUM
+            camera_type_id = sample_idx % CAM_NUM
            if camera_type_id == 0:
                boxes_per_frame = []
@@ -391,17 +415,17 @@ class NuScenesMetric(BaseMetric):
            # need to merge results from images of the same sample
            annos = []
            boxes, attrs = output_to_nusc_box(det)
-            sample_token = self.data_infos[frame_sample_id]['token']
+            sample_token = self.data_infos[frame_sample_idx]['token']
            camera_type = camera_types[camera_type_id]
            boxes, attrs = cam_nusc_box_to_global(
-                self.data_infos[frame_sample_id], boxes, attrs, classes,
+                self.data_infos[frame_sample_idx], boxes, attrs, classes,
                self.eval_detection_configs, camera_type)
            boxes_per_frame.extend(boxes)
            attrs_per_frame.extend(attrs)
            # Remove redundant predictions caused by overlap of images
-            if (sample_id + 1) % CAM_NUM != 0:
+            if (sample_idx + 1) % CAM_NUM != 0:
                continue
-            boxes = global_nusc_box_to_cam(self.data_infos[frame_sample_id],
+            boxes = global_nusc_box_to_cam(self.data_infos[frame_sample_idx],
                                           boxes_per_frame, classes,
                                           self.eval_detection_configs)
            cam_boxes3d, scores, labels = nusc_box_to_cam_box3d(boxes)
@@ -432,7 +456,7 @@ class NuScenesMetric(BaseMetric):
            det = bbox3d2result(cam_boxes3d, scores, labels, attrs)
            boxes, attrs = output_to_nusc_box(det)
            boxes, attrs = cam_nusc_box_to_global(
-                self.data_infos[frame_sample_id], boxes, attrs, classes,
+                self.data_infos[frame_sample_idx], boxes, attrs, classes,
                self.eval_detection_configs)
            for i, box in enumerate(boxes):
@@ -461,25 +485,25 @@ class NuScenesMetric(BaseMetric):
        mmengine.mkdir_or_exist(jsonfile_prefix)
        res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
-        print('Results writes to', res_path)
+        print(f'Results writes to {res_path}')
        mmengine.dump(nusc_submissions, res_path)
        return res_path
    def _format_lidar_bbox(self,
                           results: List[dict],
-                           sample_id_list: List[int],
+                           sample_idx_list: List[int],
-                           classes: List[str] = None,
+                           classes: Optional[List[str]] = None,
-                           jsonfile_prefix: str = None) -> str:
+                           jsonfile_prefix: Optional[str] = None) -> str:
        """Convert the results to the standard format.
        Args:
-            results (list[dict]): Testing results of the dataset.
+            results (List[dict]): Testing results of the dataset.
-            sample_id_list (list[int]): List of result sample id.
+            sample_idx_list (List[int]): List of result sample idx.
-            classes (list[String], optional): A list of class name. Defaults
+            classes (List[str], optional): A list of class name.
-                to None.
+                Defaults to None.
            jsonfile_prefix (str, optional): The prefix of the output jsonfile.
                You can specify the output directory/filename by
-                modifying the jsonfile_prefix. Default: None.
+                modifying the jsonfile_prefix. Defaults to None.
        Returns:
            str: Path of the output json file.
@@ -490,10 +514,10 @@ class NuScenesMetric(BaseMetric):
        for i, det in enumerate(mmengine.track_iter_progress(results)):
            annos = []
            boxes, attrs = output_to_nusc_box(det)
-            sample_id = sample_id_list[i]
+            sample_idx = sample_idx_list[i]
-            sample_token = self.data_infos[sample_id]['token']
+            sample_token = self.data_infos[sample_idx]['token']
-            boxes = lidar_nusc_box_to_global(self.data_infos[sample_id], boxes,
+            boxes = lidar_nusc_box_to_global(self.data_infos[sample_idx],
-                                             classes,
+                                             boxes, classes,
                                             self.eval_detection_configs)
            for i, box in enumerate(boxes):
                name = classes[box.label]
@@ -535,12 +559,13 @@ class NuScenesMetric(BaseMetric):
        }
        mmengine.mkdir_or_exist(jsonfile_prefix)
        res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
-        print('Results writes to', res_path)
+        print(f'Results writes to {res_path}')
        mmengine.dump(nusc_submissions, res_path)
        return res_path
-def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
+def output_to_nusc_box(
+        detection: dict) -> Tuple[List[NuScenesBox], Union[np.ndarray, None]]:
    """Convert the output to the box class in the nuScenes.
    Args:
@@ -551,7 +576,8 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
            - labels_3d (torch.Tensor): Predicted box labels.
    Returns:
-        list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
+        Tuple[List[:obj:`NuScenesBox`], np.ndarray or None]:
+        List of standard NuScenesBoxes and attribute labels.
    """
    bbox3d = detection['bboxes_3d']
    scores = detection['scores_3d'].numpy()
@@ -566,7 +592,7 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
    box_list = []
-    if type(bbox3d) == LiDARInstance3DBoxes:
+    if isinstance(bbox3d, LiDARInstance3DBoxes):
        # our LiDAR coordinate system -> nuScenes box coordinate system
        nus_box_dims = box_dims[:, [1, 0, 2]]
        for i in range(len(bbox3d)):
@@ -584,7 +610,7 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
                score=scores[i],
                velocity=velocity)
            box_list.append(box)
-    elif type(bbox3d) == CameraInstance3DBoxes:
+    elif isinstance(bbox3d, CameraInstance3DBoxes):
        # our Camera coordinate system -> nuScenes box coordinate system
        # convert the dim/rot to nuscbox convention
        nus_box_dims = box_dims[:, [2, 0, 1]]
@@ -605,7 +631,7 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
            box_list.append(box)
    else:
        raise NotImplementedError(
-            f'Do not support convert {type(bbox3d)} bboxes'
+            f'Do not support convert {type(bbox3d)} bboxes '
            'to standard NuScenesBoxes.')
    return box_list, attrs
@@ -619,13 +645,13 @@ def lidar_nusc_box_to_global(
    Args:
        info (dict): Info for a specific sample data, including the
            calibration information.
-        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        boxes (List[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
-        classes (list[str]): Mapped classes in the evaluation.
+        classes (List[str]): Mapped classes in the evaluation.
-        eval_configs (object): Evaluation configuration object.
+        eval_configs (:obj:`DetectionConfig`): Evaluation configuration object.
    Returns:
-        list: List of standard NuScenesBoxes in the global
+        List[:obj:`DetectionConfig`]: List of standard NuScenesBoxes in the
-            coordinate.
+        global coordinate.
    """
    box_list = []
    for box in boxes:
@@ -652,25 +678,26 @@ def lidar_nusc_box_to_global(
 def cam_nusc_box_to_global(
    info: dict,
    boxes: List[NuScenesBox],
-    attrs: List[str],
+    attrs: np.ndarray,
    classes: List[str],
    eval_configs: DetectionConfig,
    camera_type: str = 'CAM_FRONT',
-) -> List[NuScenesBox]:
+) -> Tuple[List[NuScenesBox], List[int]]:
    """Convert the box from camera to global coordinate.
    Args:
        info (dict): Info for a specific sample data, including the
            calibration information.
-        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        boxes (List[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
-        attrs (list[str]): List of attributes.
+        attrs (np.ndarray): Predicted attributes.
-        camera_type (str): Type of camera.
+        classes (List[str]): Mapped classes in the evaluation.
-        classes (list[str]): Mapped classes in the evaluation.
+        eval_configs (:obj:`DetectionConfig`): Evaluation configuration object.
-        eval_configs (object): Evaluation configuration object.
+        camera_type (str): Type of camera. Defaults to 'CAM_FRONT'.
    Returns:
-        list: List of standard NuScenesBoxes in the global
+        Tuple[List[:obj:`NuScenesBox`], List[int]]:
-            coordinate.
+        List of standard NuScenesBoxes in the global coordinate and
+        attribute label.
    """
    box_list = []
    attr_list = []
@@ -704,13 +731,13 @@ def global_nusc_box_to_cam(info: dict, boxes: List[NuScenesBox],
    Args:
        info (dict): Info for a specific sample data, including the
            calibration information.
-        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        boxes (List[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
-        classes (list[str]): Mapped classes in the evaluation.
+        classes (List[str]): Mapped classes in the evaluation.
-        eval_configs (object): Evaluation configuration object.
+        eval_configs (:obj:`DetectionConfig`): Evaluation configuration object.
    Returns:
-        list: List of standard NuScenesBoxes in the global
+        List[:obj:`NuScenesBox`]: List of standard NuScenesBoxes in
-            coordinate.
+        camera coordinate.
    """
    box_list = []
    for box in boxes:
@@ -736,15 +763,17 @@ def global_nusc_box_to_cam(info: dict, boxes: List[NuScenesBox],
    return box_list
-def nusc_box_to_cam_box3d(boxes: List[NuScenesBox]):
+def nusc_box_to_cam_box3d(
+    boxes: List[NuScenesBox]
+) -> Tuple[CameraInstance3DBoxes, torch.Tensor, torch.Tensor]:
    """Convert boxes from :obj:`NuScenesBox` to :obj:`CameraInstance3DBoxes`.
    Args:
-        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        boxes (:obj:`List[NuScenesBox]`): List of predicted NuScenesBoxes.
    Returns:
-        tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):
+        Tuple[:obj:`CameraInstance3DBoxes`, torch.Tensor, torch.Tensor]:
-            Converted 3D bounding boxes, scores and labels.
+        Converted 3D bounding boxes, scores and labels.
    """
    locs = torch.Tensor([b.center for b in boxes]).view(-1, 3)
    dims = torch.Tensor([b.wlh for b in boxes]).view(-1, 3)

--- a/mmdet3d/evaluation/metrics/waymo_metric.py
+++ b/mmdet3d/evaluation/metrics/waymo_metric.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import tempfile
 from os import path as osp
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Tuple, Union
 import mmengine
 import numpy as np
@@ -25,50 +25,56 @@ class WaymoMetric(KittiMetric):
        ann_file (str): The path of the annotation file in kitti format.
        waymo_bin_file (str): The path of the annotation file in waymo format.
        data_root (str): Path of dataset root.
-                         Used for storing waymo evaluation programs.
+            Used for storing waymo evaluation programs.
-        split (str): The split of the evaluation set.
+        split (str): The split of the evaluation set. Defaults to 'training'.
-        metric (str | list[str]): Metrics to be evaluated.
+        metric (str or List[str]): Metrics to be evaluated.
-            Default to 'mAP'.
+            Defaults to 'mAP'.
-        pcd_limit_range (list): The range of point cloud used to
+        pcd_limit_range (List[float]): The range of point cloud used to
            filter invalid predicted boxes.
-            Default to [0, -40, -3, 70.4, 40, 0.0].
+            Defaults to [-85, -85, -5, 85, 85, 5].
+        convert_kitti_format (bool): Whether to convert the results to
+            kitti format. Now, in order to be compatible with camera-based
+            methods, defaults to True.
        prefix (str, optional): The prefix that will be added in the metric
            names to disambiguate homonymous metrics of different evaluators.
            If prefix is not provided in the argument, self.default_prefix
            will be used instead. Defaults to None.
-        convert_kitti_format (bool, optional): Whether convert the reuslts to
+        format_only (bool): Format the output results without perform
-            kitti format. Now, in order to be compatible with camera-based
+            evaluation. It is useful when you want to format the result
-            methods, defaults to True.
+            to a specific format and submit it to the test server.
+            Defaults to False.
        pklfile_prefix (str, optional): The prefix of pkl files, including
            the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
+            If not specified, a temp file will be created. Defaults to None.
        submission_prefix (str, optional): The prefix of submission data.
            If not specified, the submission data will not be generated.
-            Default: None.
+            Defaults to None.
-        load_type (str, optional): Type of loading mode during training.
+        load_type (str): Type of loading mode during training.
            - 'frame_based': Load all of the instances in the frame.
            - 'mv_image_based': Load all of the instances in the frame and need
-                to convert to the FOV-based data type to support image-based
+              to convert to the FOV-based data type to support image-based
-                detector.
+              detector.
-            - 'fov_image_base': Only load the instances inside the default cam,
+            - 'fov_image_based': Only load the instances inside the default
-                and need to convert to the FOV-based data type to support
+              cam, and need to convert to the FOV-based data type to support
-                image-based detector.
+              image-based detector.
-        default_cam_key (str, optional): The default camera for lidar to
+        default_cam_key (str): The default camera for lidar to camera
-            camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
+            conversion. By default, KITTI: 'CAM2', Waymo: 'CAM_FRONT'.
-        use_pred_sample_idx (bool, optional): In formating results, use the
+            Defaults to 'CAM_FRONT'.
-            sample index from the prediction or from the load annoataitons.
+        use_pred_sample_idx (bool): In formating results, use the
+            sample index from the prediction or from the load annotations.
            By default, KITTI: True, Waymo: False, Waymo has a conversion
-            process, which needs to use the sample id from load annotation.
+            process, which needs to use the sample idx from load annotation.
+            Defaults to False.
        collect_device (str): Device name used for collecting results
            from different ranks during distributed training. Must be 'cpu' or
            'gpu'. Defaults to 'cpu'.
-        file_client_args (dict): file client for reading gt in waymo format.
+        file_client_args (dict): File client for reading gt in waymo format.
            Defaults to ``dict(backend='disk')``.
-        idx2metainfo (Optional[str], optional): The file path of the metainfo
+        idx2metainfo (str, optional): The file path of the metainfo in waymo.
-            in waymmo. It stores the mapping from sample_idx to metainfo.
+            It stores the mapping from sample_idx to metainfo. The metainfo
-            The metainfo must contain the keys: 'idx2contextname' and
+            must contain the keys: 'idx2contextname' and 'idx2timestamp'.
-            'idx2timestamp'. Defaults to None.
+            Defaults to None.
    """
    num_cams = 5
@@ -81,14 +87,15 @@ class WaymoMetric(KittiMetric):
                 pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5],
                 convert_kitti_format: bool = True,
                 prefix: Optional[str] = None,
-                 pklfile_prefix: str = None,
+                 format_only: bool = False,
-                 submission_prefix: str = None,
+                 pklfile_prefix: Optional[str] = None,
+                 submission_prefix: Optional[str] = None,
                 load_type: str = 'frame_based',
                 default_cam_key: str = 'CAM_FRONT',
                 use_pred_sample_idx: bool = False,
                 collect_device: str = 'cpu',
                 file_client_args: dict = dict(backend='disk'),
-                 idx2metainfo: Optional[str] = None):
+                 idx2metainfo: Optional[str] = None) -> None:
        self.waymo_bin_file = waymo_bin_file
        self.data_root = data_root
        self.split = split
@@ -101,7 +108,7 @@ class WaymoMetric(KittiMetric):
        else:
            self.idx2metainfo = None
-        super().__init__(
+        super(WaymoMetric, self).__init__(
            ann_file=ann_file,
            metric=metric,
            pcd_limit_range=pcd_limit_range,
@@ -111,13 +118,20 @@ class WaymoMetric(KittiMetric):
            default_cam_key=default_cam_key,
            collect_device=collect_device,
            file_client_args=file_client_args)
+        self.format_only = format_only
+        if self.format_only:
+            assert pklfile_prefix is not None, 'pklfile_prefix must be '
+            'not None when format_only is True, otherwise the result files '
+            'will be saved to a temp directory which will be cleaned up at '
+            'the end.'
        self.default_prefix = 'Waymo metric'
-    def compute_metrics(self, results: list) -> Dict[str, float]:
+    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
        """Compute the metrics from processed results.
        Args:
-            results (list): The processed results of the whole dataset.
+            results (List[dict]): The processed results of the whole dataset.
        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
@@ -155,7 +169,7 @@ class WaymoMetric(KittiMetric):
                    if 'image_sweeps' in info:
                        camera_info['image_sweeps'] = info['image_sweeps']
-                    # TODO check if need to modify the sample id
+                    # TODO check if need to modify the sample idx
                    # TODO check when will use it except for evaluation.
                    camera_info['sample_idx'] = info['sample_idx']
                    new_data_infos.append(camera_info)
@@ -175,6 +189,12 @@ class WaymoMetric(KittiMetric):
            classes=self.classes)
        metric_dict = {}
+        if self.format_only:
+            logger.info('results are saved in '
+                        f'{osp.dirname(self.pklfile_prefix)}')
+            return metric_dict
        for metric in self.metrics:
            ap_dict = self.waymo_evaluate(
                pklfile_prefix, metric=metric, logger=logger)
@@ -188,19 +208,19 @@ class WaymoMetric(KittiMetric):
    def waymo_evaluate(self,
                       pklfile_prefix: str,
-                       metric: str = None,
+                       metric: Optional[str] = None,
-                       logger: MMLogger = None) -> dict:
+                       logger: Optional[MMLogger] = None) -> Dict[str, float]:
        """Evaluation in Waymo protocol.
        Args:
            pklfile_prefix (str): The location that stored the prediction
                results.
-            metric (str): Metric to be evaluated. Defaults to None.
+            metric (str, optional): Metric to be evaluated. Defaults to None.
            logger (MMLogger, optional): Logger used for printing
-                related information during evaluation. Default: None.
+                related information during evaluation. Defaults to None.
        Returns:
-            dict[str, float]: Results of each evaluation metric.
+            Dict[str, float]: Results of each evaluation metric.
        """
        import subprocess
@@ -238,8 +258,6 @@ class WaymoMetric(KittiMetric):
            }
            mAP_splits = ret_texts.split('mAP ')
            mAPH_splits = ret_texts.split('mAPH ')
-            mAP_splits = ret_texts.split('mAP ')
-            mAPH_splits = ret_texts.split('mAPH ')
            for idx, key in enumerate(ap_dict.keys()):
                split_idx = int(idx / 2) + 1
                if idx % 2 == 0:  # mAP
@@ -307,31 +325,32 @@ class WaymoMetric(KittiMetric):
                    ap_dict['Cyclist mAPH']) / 3
        return ap_dict
-    def format_results(self,
+    def format_results(
-                       results: List[dict],
+        self,
-                       pklfile_prefix: str = None,
+        results: List[dict],
-                       submission_prefix: str = None,
+        pklfile_prefix: Optional[str] = None,
-                       classes: List[str] = None):
+        submission_prefix: Optional[str] = None,
+        classes: Optional[List[str]] = None
+    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
        """Format the results to bin file.
        Args:
-            results (list[dict]): Testing results of the
+            results (List[dict]): Testing results of the dataset.
-                dataset.
            pklfile_prefix (str, optional): The prefix of pkl files. It
                includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
            submission_prefix (str, optional): The prefix of submitted files.
                It includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
-            classes (list[String], optional): A list of class name. Defaults
+            classes (List[str], optional): A list of class name.
-                to None.
+                Defaults to None.
        Returns:
            tuple: (result_dict, tmp_dir), result_dict is a dict containing
-                the formatted result, tmp_dir is the temporal directory created
+            the formatted result, tmp_dir is the temporal directory created
-                for saving json files when jsonfile_prefix is not specified.
+            for saving json files when jsonfile_prefix is not specified.
        """
        waymo_save_tmp_dir = tempfile.TemporaryDirectory()
        waymo_results_save_dir = waymo_save_tmp_dir.name
@@ -378,15 +397,16 @@ class WaymoMetric(KittiMetric):
        return final_results, waymo_save_tmp_dir
    def merge_multi_view_boxes(self, box_dict_per_frame: List[dict],
-                               cam0_info: dict):
+                               cam0_info: dict) -> dict:
        """Merge bounding boxes predicted from multi-view images.
        Args:
-            box_dict_per_frame (list[dict]): The results of prediction
+            box_dict_per_frame (List[dict]): The results of prediction
                for each camera.
-            cam2_info (dict): store the sample id for the given frame.
+            cam0_info (dict): Store the sample idx for the given frame.
        Returns:
-            merged_box_dict (dict), store the merge results
+            dict: Merged results.
        """
        box_dict = dict()
        # convert list[dict] to dict[list]
@@ -444,27 +464,28 @@ class WaymoMetric(KittiMetric):
        )
        return merged_box_dict
-    def bbox2result_kitti(self,
+    def bbox2result_kitti(
-                          net_outputs: list,
+            self,
-                          sample_id_list: list,
+            net_outputs: List[dict],
-                          class_names: list,
+            sample_idx_list: List[int],
-                          pklfile_prefix: str = None,
+            class_names: List[str],
-                          submission_prefix: str = None):
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
        """Convert 3D detection results to kitti format for evaluation and test
        submission.
        Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
+            sample_idx_list (List[int]): List of input sample idx.
-            class_names (list[String]): A list of class names.
+            class_names (List[str]): A list of class names.
            pklfile_prefix (str, optional): The prefix of pkl file.
                Defaults to None.
            submission_prefix (str, optional): The prefix of submission file.
                Defaults to None.
        Returns:
-            list[dict]: A list of dictionaries with the kitti format.
+            List[dict]: A list of dictionaries with the kitti format.
        """
        if submission_prefix is not None:
            mmengine.mkdir_or_exist(submission_prefix)
@@ -473,8 +494,7 @@ class WaymoMetric(KittiMetric):
        print('\nConverting prediction to KITTI format')
        for idx, pred_dicts in enumerate(
                mmengine.track_iter_progress(net_outputs)):
-            annos = []
+            sample_idx = sample_idx_list[idx]
-            sample_idx = sample_id_list[idx]
            info = self.data_infos[sample_idx]
            if self.load_type == 'mv_image_based':
@@ -536,7 +556,6 @@ class WaymoMetric(KittiMetric):
                    anno['score'].append(score)
                anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)
            else:
                anno = {
                    'name': np.array([]),
@@ -549,7 +568,6 @@ class WaymoMetric(KittiMetric):
                    'rotation_y': np.array([]),
                    'score': np.array([]),
                }
-                annos.append(anno)
            if submission_prefix is not None:
                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
@@ -577,10 +595,10 @@ class WaymoMetric(KittiMetric):
                # In waymo validation sample_idx in prediction is 000xxx
                # but in info file it is 1000xxx
                save_sample_idx = box_dict['sample_idx']
-            annos[-1]['sample_idx'] = np.array(
+            anno['sample_idx'] = np.array(
-                [save_sample_idx] * len(annos[-1]['score']), dtype=np.int64)
+                [save_sample_idx] * len(anno['score']), dtype=np.int64)
-            det_annos += annos
+            det_annos.append(anno)
        if pklfile_prefix is not None:
            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -592,16 +610,16 @@ class WaymoMetric(KittiMetric):
        return det_annos
-    def convert_valid_bboxes(self, box_dict: dict, info: dict):
+    def convert_valid_bboxes(self, box_dict: dict, info: dict) -> dict:
        """Convert the predicted boxes into valid ones. Should handle the
        load_model (frame_based, mv_image_based, fov_image_based), separately.
        Args:
            box_dict (dict): Box dictionaries to be converted.
-                - bboxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
+                - bboxes_3d (:obj:`BaseInstance3DBoxes`): 3D bounding boxes.
-                - scores_3d (torch.Tensor): Scores of boxes.
+                - scores_3d (Tensor): Scores of boxes.
-                - labels_3d (torch.Tensor): Class labels of boxes.
+                - labels_3d (Tensor): Class labels of boxes.
            info (dict): Data info.
        Returns:
@@ -609,9 +627,9 @@ class WaymoMetric(KittiMetric):
                - bbox (np.ndarray): 2D bounding boxes.
                - box3d_camera (np.ndarray): 3D bounding boxes in
-                    camera coordinate.
+                  camera coordinate.
                - box3d_lidar (np.ndarray): 3D bounding boxes in
-                    LiDAR coordinate.
+                  LiDAR coordinate.
                - scores (np.ndarray): Scores of boxes.
                - label_preds (np.ndarray): Class label predictions.
                - sample_idx (int): Sample index.
@@ -673,7 +691,7 @@ class WaymoMetric(KittiMetric):
            valid_pcd_inds = ((box_preds_lidar.center > limit_range[:3]) &
                              (box_preds_lidar.center < limit_range[3:]))
            valid_inds = valid_pcd_inds.all(-1)
-        if self.load_type in ['mv_image_based', 'fov_image_based']:
+        elif self.load_type in ['mv_image_based', 'fov_image_based']:
            valid_inds = valid_cam_inds
        if valid_inds.sum() > 0: