[Fix]: Fix nuscenes dataset training (#1882)

* fix nuscenes training bug * update kitti_dataset docstring * update nuscenes_dataset docstring * fix lint and modifty docstring

[Fix]: Fix nuscenes dataset training (#1882)
* fix nuscenes training bug * update kitti_dataset docstring * update nuscenes_dataset docstring * fix lint and modifty docstring
74117ce4 · Xiangxu-0103 · GitHub · f3898480 · 74117ce4 · 74117ce4
Unverified Commit 74117ce4 authored Oct 08, 2022 by Xiangxu-0103 Committed by GitHub Oct 08, 2022
8 changed files
--- a/mmdet3d/datasets/det3d_dataset.py
+++ b/mmdet3d/datasets/det3d_dataset.py
@@ -26,11 +26,11 @@ class Det3DDataset(BaseDataset):
        metainfo (dict, optional): Meta information for dataset, such as class
            information. Defaults to None.
        data_prefix (dict, optional): Prefix for training data. Defaults to
-            dict(pts='velodyne', img="").
+            dict(pts='velodyne', img='').
        pipeline (list[dict], optional): Pipeline used for data processing.
            Defaults to None.
        modality (dict, optional): Modality to specify the sensor data used
-            as input, it usually has following keys.
+            as input, it usually has following keys:

                - use_camera: bool
                - use_lidar: bool
@@ -40,7 +40,7 @@ class Det3DDataset(BaseDataset):
        box_type_3d (str, optional): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
-            Defaults to 'LiDAR'. Available options includes
+            Defaults to 'LiDAR'. Available options includes:

            - 'LiDAR': Box in LiDAR coordinates, usually for
              outdoor point cloud 3d detection.
@@ -49,15 +49,15 @@ class Det3DDataset(BaseDataset):
            - 'Camera': Box in camera coordinates, usually
              for vision-based 3d detection.

-        filter_empty_gt (bool): Whether to filter the data with
+        filter_empty_gt (bool, optional): Whether to filter the data with
            empty GT. Defaults to True.
-        test_mode (bool): Whether the dataset is in test mode.
+        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
-        load_eval_anns (bool): Whether to load annotations
-            in test_mode, the annotation will be save in
-            `eval_ann_infos`, which can be use in Evaluator.
-        file_client_args (dict): Configuration of file client.
-            Defaults to `dict(backend='disk')`.
+        load_eval_anns (bool, optional): Whether to load annotations
+            in test_mode, the annotation will be save in `eval_ann_infos`,
+            which can be used in Evaluator. Defaults to True.
+        file_client_args (dict, optional): Configuration of file client.
+            Defaults to dict(backend='disk').
    """

    def __init__(self,
@@ -73,7 +73,7 @@ class Det3DDataset(BaseDataset):
                 test_mode: bool = False,
                 load_eval_anns=True,
                 file_client_args: dict = dict(backend='disk'),
-                 **kwargs):
+                 **kwargs) -> None:
        # init file client
        self.file_client = mmengine.FileClient(**file_client_args)
        self.filter_empty_gt = filter_empty_gt
@@ -125,7 +125,7 @@ class Det3DDataset(BaseDataset):
        self.metainfo['box_type_3d'] = box_type_3d
        self.metainfo['label_mapping'] = self.label_mapping

-    def _remove_dontcare(self, ann_info):
+    def _remove_dontcare(self, ann_info: dict) -> dict:
        """Remove annotations that do not need to be cared.

        -1 indicate dontcare in MMDet3d.
@@ -291,7 +291,7 @@ class Det3DDataset(BaseDataset):

        return info

-    def prepare_data(self, index):
+    def prepare_data(self, index: int) -> Optional[dict]:
        """Data preparation for both training and testing stage.

        Called by `__getitem__`  of dataset.
@@ -300,7 +300,7 @@ class Det3DDataset(BaseDataset):
            index (int): Index for accessing the target data.

        Returns:
-            dict: Data dict of the corresponding index.
+            dict | None: Data dict of the corresponding index.
        """
        input_dict = self.get_data_info(index)


--- a/mmdet3d/datasets/kitti_dataset.py
+++ b/mmdet3d/datasets/kitti_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Callable, List, Optional, Union
+from typing import Callable, List, Union

 import numpy as np

@@ -22,11 +22,12 @@ class KittiDataset(Det3DDataset):
            Defaults to None.
        modality (dict, optional): Modality to specify the sensor data used
            as input. Defaults to `dict(use_lidar=True)`.
-
+        default_cam_key (str, optional): The default camera name adopted.
+            Defaults to 'CAM2'.
        box_type_3d (str, optional): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
-            Defaults to 'LiDAR' in this dataset. Available options includes
+            Defaults to 'LiDAR' in this dataset. Available options includes:

            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
@@ -35,9 +36,9 @@ class KittiDataset(Det3DDataset):
            Defaults to True.
        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
-        pcd_limit_range (list, optional): The range of point cloud used to
-            filter invalid predicted boxes.
-            Default: [0, -40, -3, 70.4, 40, 0.0].
+        pcd_limit_range (list[float], optional): The range of point cloud
+            used to filter invalid predicted boxes.
+            Defaults to [0, -40, -3, 70.4, 40, 0.0].
    """
    # TODO: use full classes of kitti
    METAINFO = {
@@ -49,13 +50,13 @@ class KittiDataset(Det3DDataset):
                 data_root: str,
                 ann_file: str,
                 pipeline: List[Union[dict, Callable]] = [],
-                 modality: Optional[dict] = dict(use_lidar=True),
+                 modality: dict = dict(use_lidar=True),
                 default_cam_key: str = 'CAM2',
                 box_type_3d: str = 'LiDAR',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,
                 pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
-                 **kwargs):
+                 **kwargs) -> None:

        self.pcd_limit_range = pcd_limit_range
        super().__init__(
@@ -111,7 +112,7 @@ class KittiDataset(Det3DDataset):

        return info

-    def parse_ann_info(self, info):
+    def parse_ann_info(self, info: dict) -> dict:
        """Get annotation info according to the given index.

        Args:

--- a/mmdet3d/datasets/lyft_dataset.py
+++ b/mmdet3d/datasets/lyft_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, List
+from typing import Callable, List, Union

 import numpy as np

@@ -24,18 +24,18 @@ class LyftDataset(Det3DDataset):
        pipeline (list[dict], optional): Pipeline used for data processing.
            Defaults to None.
        modality (dict, optional): Modality to specify the sensor data used
-            as input. Defaults to None.
+            as input. Defaults to dict(use_camera=False, use_lidar=True).
        box_type_3d (str): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
-            Defaults to 'LiDAR' in this dataset. Available options includes
+            Defaults to 'LiDAR' in this dataset. Available options includes:

            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
-        filter_empty_gt (bool): Whether to filter empty GT.
+        filter_empty_gt (bool, optional): Whether to filter empty GT.
            Defaults to True.
-        test_mode (bool): Whether the dataset is in test mode.
+        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
    """

@@ -48,8 +48,8 @@ class LyftDataset(Det3DDataset):
    def __init__(self,
                 data_root: str,
                 ann_file: str,
-                 pipeline: List[dict] = None,
-                 modality: Dict = dict(use_camera=False, use_lidar=True),
+                 pipeline: List[Union[dict, Callable]] = [],
+                 modality: dict = dict(use_camera=False, use_lidar=True),
                 box_type_3d: str = 'LiDAR',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,

--- a/mmdet3d/datasets/nuscenes_dataset.py
+++ b/mmdet3d/datasets/nuscenes_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from os import path as osp
-from typing import Dict, List
+from typing import Callable, List, Union

 import numpy as np

@@ -22,25 +22,26 @@ class NuScenesDataset(Det3DDataset):
    Args:
        data_root (str): Path of dataset root.
        ann_file (str): Path of annotation file.
+        task (str, optional): Detection task. Defaults to '3d'.
        pipeline (list[dict], optional): Pipeline used for data processing.
            Defaults to None.
        box_type_3d (str): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
-            Defaults to 'LiDAR' in this dataset. Available options includes.
+            Defaults to 'LiDAR' in this dataset. Available options includes:

            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
        modality (dict, optional): Modality to specify the sensor data used
-            as input. Defaults to dict(use_camera=False,use_lidar=True).
-        filter_empty_gt (bool): Whether to filter empty GT.
+            as input. Defaults to dict(use_camera=False, use_lidar=True).
+        filter_empty_gt (bool, optional): Whether to filter empty GT.
            Defaults to True.
-        test_mode (bool): Whether the dataset is in test mode.
+        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
-        with_velocity (bool): Whether include velocity prediction
+        with_velocity (bool, optional): Whether to include velocity prediction
            into the experiments. Defaults to True.
-        use_valid_flag (bool): Whether to use `use_valid_flag` key
+        use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
            in the info file as mask to filter gt_boxes and gt_names.
            Defaults to False.
    """
@@ -56,9 +57,9 @@ class NuScenesDataset(Det3DDataset):
                 data_root: str,
                 ann_file: str,
                 task: str = '3d',
-                 pipeline: List[dict] = None,
+                 pipeline: List[Union[dict, Callable]] = [],
                 box_type_3d: str = 'LiDAR',
-                 modality: Dict = dict(
+                 modality: dict = dict(
                     use_camera=False,
                     use_lidar=True,
                 ),
@@ -66,7 +67,7 @@ class NuScenesDataset(Det3DDataset):
                 test_mode: bool = False,
                 with_velocity: bool = True,
                 use_valid_flag: bool = False,
-                 **kwargs):
+                 **kwargs) -> None:
        self.use_valid_flag = use_valid_flag
        self.with_velocity = with_velocity

@@ -85,7 +86,7 @@ class NuScenesDataset(Det3DDataset):
            test_mode=test_mode,
            **kwargs)

-    def _filter_with_mask(self, ann_info):
+    def _filter_with_mask(self, ann_info: dict) -> dict:
        """Remove annotations that do not need to be cared.

        Args:

--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -36,7 +36,7 @@ class ScanNetDataset(Det3DDataset):
        box_type_3d (str): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
-            Defaults to 'Depth' in this dataset. Available options includes
+            Defaults to 'Depth' in this dataset. Available options includes:

            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
@@ -61,13 +61,13 @@ class ScanNetDataset(Det3DDataset):
    def __init__(self,
                 data_root: str,
                 ann_file: str,
-                 metainfo: dict = None,
+                 metainfo: Optional[dict] = None,
                 data_prefix: dict = dict(
                     pts='points',
                     pts_instance_mask='instance_mask',
                     pts_semantic_mask='semantic_mask'),
                 pipeline: List[Union[dict, Callable]] = [],
-                 modality=dict(use_camera=False, use_lidar=True),
+                 modality: dict = dict(use_camera=False, use_lidar=True),
                 box_type_3d: str = 'Depth',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,
@@ -101,7 +101,7 @@ class ScanNetDataset(Det3DDataset):
        assert self.modality['use_camera'] or self.modality['use_lidar']

    @staticmethod
-    def _get_axis_align_matrix(info: dict) -> dict:
+    def _get_axis_align_matrix(info: dict) -> np.ndarray:
        """Get axis_align_matrix from info. If not exist, return identity mat.

        Args:

--- a/mmdet3d/datasets/sunrgbd_dataset.py
+++ b/mmdet3d/datasets/sunrgbd_dataset.py
@@ -24,25 +24,25 @@ class SUNRGBDDataset(Det3DDataset):
        ann_file (str): Path of annotation file.
        metainfo (dict, optional): Meta information for dataset, such as class
            information. Defaults to None.
-        data_prefix (dict): Prefix for data. Defaults to
-            `dict(pts='points',img='sunrgbd_trainval')`.
+        data_prefix (dict, optiona;): Prefix for data. Defaults to
+            dict(pts='points',img='sunrgbd_trainval').
        pipeline (list[dict], optional): Pipeline used for data processing.
            Defaults to None.
        modality (dict, optional): Modality to specify the sensor data used
-            as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
-        default_cam_key (str): The default camera name adopted.
-            Defaults to "CAM0".
+            as input. Defaults to dict(use_camera=True, use_lidar=True).
+        default_cam_key (str, optional): The default camera name adopted.
+            Defaults to 'CAM0'.
        box_type_3d (str): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
-            Defaults to 'Depth' in this dataset. Available options includes
+            Defaults to 'Depth' in this dataset. Available options includes:

            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
-        filter_empty_gt (bool): Whether to filter empty GT.
+        filter_empty_gt (bool, optional): Whether to filter empty GT.
            Defaults to True.
-        test_mode (bool): Whether the dataset is in test mode.
+        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
    """
    METAINFO = {

--- a/mmdet3d/datasets/waymo_dataset.py
+++ b/mmdet3d/datasets/waymo_dataset.py
@@ -23,8 +23,8 @@ class WaymoDataset(KittiDataset):
    Args:
        data_root (str): Path of dataset root.
        ann_file (str): Path of annotation file.
-        data_prefix (list[dict]): data prefix for point cloud and
-            camera data dict, default to dict(
+        data_prefix (dict): data prefix for point cloud and
+            camera data dict. Default to dict(
                                    pts='velodyne',
                                    CAM_FRONT='image_0',
                                    CAM_FRONT_RIGHT='image_1',
@@ -34,13 +34,14 @@ class WaymoDataset(KittiDataset):
        pipeline (list[dict], optional): Pipeline used for data processing.
            Defaults to None.
        modality (dict, optional): Modality to specify the sensor data used
-            as input. Defaults to `dict(use_lidar=True)`.
+            as input. Defaults to dict(use_lidar=True).
        default_cam_key (str, optional): Default camera key for lidar2img
-            association.
+            association. Defaults to 'CAM_FRONT'.
        box_type_3d (str, optional): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
-            Defaults to 'LiDAR' in this dataset. Available options includes
+            Defaults to 'LiDAR' in this dataset. Available options includes:
+
            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
@@ -48,16 +49,18 @@ class WaymoDataset(KittiDataset):
            Defaults to True.
        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
-        pcd_limit_range (list, optional): The range of point cloud used to
-            filter invalid predicted boxes.
-            Default: [-85, -85, -5, 85, 85, 5].
+        pcd_limit_range (list[float], optional): The range of point cloud
+            used to filter invalid predicted boxes.
+            Defaults to [-85, -85, -5, 85, 85, 5].
        cam_sync_instances (bool, optional): If use the camera sync label
-            supported from waymo version 1.3.1.
+            supported from waymo version 1.3.1. Defaults to False.
        load_interval (int, optional): load frame interval.
+            Defaults to 1.
        task (str, optional): task for 3D detection (lidar, mono3d).
            lidar: take all the ground trurh in the frame.
            mono3d: take the groundtruth that can be seen in the cam.
-        max_sweeps (int, optional): max sweep for each frame.
+            Defaults to 'lidar'.
+        max_sweeps (int, optional): max sweep for each frame. Defaults to 0.
    """
    METAINFO = {'CLASSES': ('Car', 'Pedestrian', 'Cyclist')}


--- a/tools/dataset_converters/update_infos_to_v2.py
+++ b/tools/dataset_converters/update_infos_to_v2.py
@@ -284,6 +284,7 @@ def update_nuscenes_infos(pkl_path, out_dir):
        temp_data_info['ego2global'] = convert_quaternion_to_matrix(
            ori_info_dict['ego2global_rotation'],
            ori_info_dict['ego2global_translation'])
+        temp_data_info['lidar_points']['num_pts_feats'] = 5
        temp_data_info['lidar_points']['lidar_path'] = ori_info_dict[
            'lidar_path'].split('/')[-1]
        temp_data_info['lidar_points'][