[Fix]: fix semantic segmentation related bugs (#1909)

delete whitespace update docs remove unnecessary optional docs update docs add mmengine assertion add docstring fix mminstall update mmengine version fix [Fix]: fix semantic segmentation related bugs (#1909) fix semantic seg fix lint remove unused imports fix update pointnet2-s3dis config update data_list according to scene_idxs remove useless function fix bug lack `eval_ann_info` during evaluation fix bug update doc fix lint update docs Update det3d_dataset.py update docstrings update docs fix lint update docs fix fix fix lint

[Fix]: fix semantic segmentation related bugs (#1909)
delete whitespace update docs remove unnecessary optional docs update docs add mmengine assertion add docstring fix mminstall update mmengine version fix [Fix]: fix semantic segmentation related bugs (#1909) fix semantic seg fix lint remove unused imports fix update pointnet2-s3dis config update data_list according to scene_idxs remove useless function fix bug lack `eval_ann_info` during evaluation fix bug update doc fix lint update docs Update det3d_dataset.py update docstrings update docs fix lint update docs fix fix fix lint
adb17824 · xiangxu-0103 · ZwwWayne · b37dc416 · adb17824 · adb17824
Commit adb17824 authored Oct 14, 2022 by xiangxu-0103 Committed by ZwwWayne Dec 03, 2022
16 changed files
--- a/mmdet3d/datasets/s3dis_dataset.py
+++ b/mmdet3d/datasets/s3dis_dataset.py
@@ -166,24 +166,25 @@ class _S3DISSegDataset(Seg3DDataset):
    wrapper to concat all the provided data in different areas.
    Args:
-        data_root (str): Path of dataset root.
+        data_root (str, optional): Path of dataset root, Defaults to None.
-        ann_file (str): Path of annotation file.
+        ann_file (str): Path of annotation file. Defaults to ''.
-        pipeline (list[dict], optional): Pipeline used for data processing.
+        metainfo (dict, optional): Meta information for dataset, such as class
-            Defaults to None.
+            information. Defaults to None.
-        classes (tuple[str], optional): Classes used in the dataset.
+        data_prefix (dict): Prefix for training data. Defaults to
-            Defaults to None.
+            dict(pts='points', instance_mask='', semantic_mask='').
-        palette (list[list[int]], optional): The palette of segmentation map.
+        pipeline (list[dict]): Pipeline used for data processing.
-            Defaults to None.
+            Defaults to [].
-        modality (dict, optional): Modality to specify the sensor data used
+        modality (dict): Modality to specify the sensor data used as input.
-            as input. Defaults to None.
+            Defaults to dict(use_lidar=True, use_camera=False).
-        test_mode (bool, optional): Whether the dataset is in test mode.
-            Defaults to False.
        ignore_index (int, optional): The label index to be ignored, e.g.
-            unannotated points. If None is given, set to len(self.CLASSES).
+            unannotated points. If None is given, set to len(self.CLASSES) to
+            be consistent with PointSegClassMapping function in pipeline.
            Defaults to None.
        scene_idxs (np.ndarray | str, optional): Precomputed index to load
            data. For scenes with many points, we may sample it several times.
            Defaults to None.
+        test_mode (bool): Whether the dataset is in test mode.
+            Defaults to False.
    """
    METAINFO = {
        'CLASSES':
@@ -207,9 +208,9 @@ class _S3DISSegDataset(Seg3DDataset):
                     pts='points', img='', instance_mask='', semantic_mask=''),
                 pipeline: List[Union[dict, Callable]] = [],
                 modality: dict = dict(use_lidar=True, use_camera=False),
-                 ignore_index=None,
+                 ignore_index: Optional[int] = None,
-                 scene_idxs=None,
+                 scene_idxs: Optional[Union[np.ndarray, str]] = None,
-                 test_mode=False,
+                 test_mode: bool = False,
                 **kwargs) -> None:
        super().__init__(
            data_root=data_root,
@@ -250,37 +251,40 @@ class S3DISSegDataset(_S3DISSegDataset):
    data downloading.
    Args:
-        data_root (str): Path of dataset root.
+        data_root (str, optional): Path of dataset root. Defaults to None.
        ann_files (list[str]): Path of several annotation files.
-        pipeline (list[dict], optional): Pipeline used for data processing.
+            Defaults to ''.
-            Defaults to None.
+        metainfo (dict, optional): Meta information for dataset, such as class
-        classes (tuple[str], optional): Classes used in the dataset.
+            information. Defaults to None.
-            Defaults to None.
+        data_prefix (dict): Prefix for training data. Defaults to
-        palette (list[list[int]], optional): The palette of segmentation map.
+            dict(pts='points', instance_mask='', semantic_mask='').
-            Defaults to None.
+        pipeline (list[dict]): Pipeline used for data processing.
-        modality (dict, optional): Modality to specify the sensor data used
+            Defaults to [].
-            as input. Defaults to None.
+        modality (dict): Modality to specify the sensor data used as input.
-        test_mode (bool, optional): Whether the dataset is in test mode.
+            Defaults to dict(use_lidar=True, use_camera=False).
-            Defaults to False.
        ignore_index (int, optional): The label index to be ignored, e.g.
-            unannotated points. If None is given, set to len(self.CLASSES).
+            unannotated points. If None is given, set to len(self.CLASSES) to
+            be consistent with PointSegClassMapping function in pipeline.
            Defaults to None.
        scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index
-            to load data. For scenes with many points, we may sample it several
+            to load data. For scenes with many points, we may sample it
-            times. Defaults to None.
+            several times. Defaults to None.
+        test_mode (bool): Whether the dataset is in test mode.
+            Defaults to False.
    """
    def __init__(self,
                 data_root: Optional[str] = None,
-                 ann_files: str = '',
+                 ann_files: List[str] = '',
                 metainfo: Optional[dict] = None,
                 data_prefix: dict = dict(
                     pts='points', img='', instance_mask='', semantic_mask=''),
                 pipeline: List[Union[dict, Callable]] = [],
                 modality: dict = dict(use_lidar=True, use_camera=False),
-                 ignore_index=None,
+                 ignore_index: Optional[int] = None,
-                 scene_idxs=None,
+                 scene_idxs: Optional[Union[List[np.ndarray],
-                 test_mode=False,
+                                            List[str]]] = None,
+                 test_mode: bool = False,
                 **kwargs) -> None:
        # make sure that ann_files and scene_idxs have same length
@@ -318,13 +322,12 @@ class S3DISSegDataset(_S3DISSegDataset):
        # data_list and scene_idxs need to be concat
        self.concat_data_list([dst.data_list for dst in datasets])
-        self.concat_scene_idxs([dst.scene_idxs for dst in datasets])
        # set group flag for the sampler
        if not self.test_mode:
            self._set_group_flag()
-    def concat_data_list(self, data_lists):
+    def concat_data_list(self, data_lists: List[List[dict]]) -> List[dict]:
        """Concat data_list from several datasets to form self.data_list.
        Args:
@@ -334,21 +337,6 @@ class S3DISSegDataset(_S3DISSegDataset):
            data for data_list in data_lists for data in data_list
        ]
-    def concat_scene_idxs(self, scene_idxs):
-        """Concat scene_idxs from several datasets to form self.scene_idxs.
-        Needs to manually add offset to scene_idxs[1, 2, ...].
-        Args:
-            scene_idxs (list[np.ndarray])
-        """
-        self.scene_idxs = np.array([], dtype=np.int32)
-        offset = 0
-        for one_scene_idxs in scene_idxs:
-            self.scene_idxs = np.concatenate(
-                [self.scene_idxs, one_scene_idxs + offset]).astype(np.int32)
-            offset = np.unique(self.scene_idxs).max() + 1
    @staticmethod
    def _duplicate_to_list(x, num):
        """Repeat x `num` times to form a list."""

--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -26,13 +26,13 @@ class ScanNetDataset(Det3DDataset):
        metainfo (dict, optional): Meta information for dataset, such as class
            information. Defaults to None.
        data_prefix (dict): Prefix for data. Defaults to
-            `dict(pts='points',
+            dict(pts='points',
                 pts_isntance_mask='instance_mask',
-                pts_semantic_mask='semantic_mask')`.
+                 pts_semantic_mask='semantic_mask').
        pipeline (list[dict]): Pipeline used for data processing.
-            Defaults to None.
+            Defaults to [].
-        modality (dict): Modality to specify the sensor data used
+        modality (dict): Modality to specify the sensor data used as input.
-            as input. Defaults to None.
+            Defaults to dict(use_camera=False, use_lidar=True).
        box_type_3d (str): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
@@ -41,8 +41,10 @@ class ScanNetDataset(Det3DDataset):
            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
-        filter_empty_gt (bool): Whether to filter empty GT.
+        filter_empty_gt (bool): Whether to filter the data with empty GT.
-            Defaults to True.
+            If it's set to be True, the example with empty annotations after
+            data pipeline will be dropped and a random example will be chosen
+            in `__getitem__`. Defaults to True.
        test_mode (bool): Whether the dataset is in test mode.
            Defaults to False.
    """
@@ -71,7 +73,7 @@ class ScanNetDataset(Det3DDataset):
                 box_type_3d: str = 'Depth',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,
-                 **kwargs):
+                 **kwargs) -> None:
        # construct seg_label_mapping for semantic mask
        seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
@@ -128,8 +130,8 @@ class ScanNetDataset(Det3DDataset):
            info (dict): Raw info dict.
        Returns:
-            dict: Data information that will be passed to the data
+            dict: Has `ann_info` in training stage. And
-            preprocessing transforms. It includes the following keys:
+            all path has been converted to absolute path.
        """
        info['axis_align_matrix'] = self._get_axis_align_matrix(info)
        info['pts_instance_mask_path'] = osp.join(
@@ -146,13 +148,13 @@ class ScanNetDataset(Det3DDataset):
        return info
    def parse_ann_info(self, info: dict) -> dict:
-        """Process the `instances` in data info to `ann_info`
+        """Process the `instances` in data info to `ann_info`.
        Args:
            info (dict): Info dict.
        Returns:
-            dict: Processed `ann_info`
+            dict: Processed `ann_info`.
        """
        ann_info = super().parse_ann_info(info)
        # empty gt
@@ -181,24 +183,25 @@ class ScanNetSegDataset(Seg3DDataset):
    for data downloading.
    Args:
-        data_root (str): Path of dataset root.
+        data_root (str, optional): Path of dataset root. Defaults to None.
-        ann_file (str): Path of annotation file.
+        ann_file (str): Path of annotation file. Defaults to ''.
-        pipeline (list[dict], optional): Pipeline used for data processing.
+        pipeline (list[dict]): Pipeline used for data processing.
-            Defaults to None.
+            Defaults to [].
-        classes (tuple[str], optional): Classes used in the dataset.
+        metainfo (dict, optional): Meta information for dataset, such as class
-            Defaults to None.
+            information. Defaults to None.
-        palette (list[list[int]], optional): The palette of segmentation map.
+        data_prefix (dict): Prefix for training data. Defaults to
-            Defaults to None.
+            dict(pts='velodyne', img='', instance_mask='', semantic_mask='').
-        modality (dict, optional): Modality to specify the sensor data used
+        modality (dict): Modality to specify the sensor data used as input.
-            as input. Defaults to None.
+            Defaults to dict(use_lidar=True, use_camera=False).
-        test_mode (bool, optional): Whether the dataset is in test mode.
-            Defaults to False.
        ignore_index (int, optional): The label index to be ignored, e.g.
-            unannotated points. If None is given, set to len(self.CLASSES).
+            unannotated points. If None is given, set to len(self.CLASSES) to
+            be consistent with PointSegClassMapping function in pipeline.
            Defaults to None.
        scene_idxs (np.ndarray | str, optional): Precomputed index to load
            data. For scenes with many points, we may sample it several times.
            Defaults to None.
+        test_mode (bool): Whether the dataset is in test mode.
+            Defaults to False.
    """
    METAINFO = {
        'CLASSES':
@@ -242,9 +245,9 @@ class ScanNetSegDataset(Seg3DDataset):
                     pts='points', img='', instance_mask='', semantic_mask=''),
                 pipeline: List[Union[dict, Callable]] = [],
                 modality: dict = dict(use_lidar=True, use_camera=False),
-                 ignore_index=None,
+                 ignore_index: Optional[int] = None,
-                 scene_idxs=None,
+                 scene_idxs: Optional[Union[np.ndarray, str]] = None,
-                 test_mode=False,
+                 test_mode: bool = False,
                 **kwargs) -> None:
        super().__init__(
            data_root=data_root,
@@ -315,10 +318,10 @@ class ScanNetInstanceSegDataset(Seg3DDataset):
                     pts='points', img='', instance_mask='', semantic_mask=''),
                 pipeline: List[Union[dict, Callable]] = [],
                 modality: dict = dict(use_lidar=True, use_camera=False),
-                 test_mode=False,
+                 test_mode: bool = False,
-                 ignore_index=None,
+                 ignore_index: Optional[int] = None,
-                 scene_idxs=None,
+                 scene_idxs: Optional[Union[np.ndarray, str]] = None,
-                 file_client_args=dict(backend='disk'),
+                 file_client_args: dict = dict(backend='disk'),
                 **kwargs) -> None:
        super().__init__(
            data_root=data_root,

--- a/mmdet3d/datasets/seg3d_dataset.py
+++ b/mmdet3d/datasets/seg3d_dataset.py
@@ -16,24 +16,20 @@ class Seg3DDataset(BaseDataset):
    This is the base dataset of ScanNet, S3DIS and SemanticKITTI dataset.
    Args:
-        data_root (str): Path of dataset root.
+        data_root (str, optional): Path of dataset root. Defaults to None.
-        ann_file (str): Path of annotation file.
+        ann_file (str): Path of annotation file. Defaults to ''.
-        pipeline (list[dict], optional): Pipeline used for data processing.
-            Defaults to None.
        metainfo (dict, optional): Meta information for dataset, such as class
            information. Defaults to None.
-        data_prefix (dict, optional): Prefix for training data. Defaults to
+        data_prefix (dict): Prefix for training data. Defaults to
            dict(pts='velodyne', img='', instance_mask='', semantic_mask='').
-        pipeline (list[dict], optional): Pipeline used for data processing.
+        pipeline (list[dict]): Pipeline used for data processing.
-            Defaults to None.
+            Defaults to [].
-        modality (dict, optional): Modality to specify the sensor data used
+        modality (dict): Modality to specify the sensor data used
-            as input, it usually has following keys.
+            as input, it usually has following keys:
                - use_camera: bool
                - use_lidar: bool
-            Defaults to `dict(use_lidar=True, use_camera=False)`
+            Defaults to dict(use_lidar=True, use_camera=False).
-        test_mode (bool, optional): Whether the dataset is in test mode.
-            Defaults to False.
        ignore_index (int, optional): The label index to be ignored, e.g.
            unannotated points. If None is given, set to len(self.CLASSES) to
            be consistent with PointSegClassMapping function in pipeline.
@@ -41,11 +37,13 @@ class Seg3DDataset(BaseDataset):
        scene_idxs (np.ndarray | str, optional): Precomputed index to load
            data. For scenes with many points, we may sample it several times.
            Defaults to None.
-        load_eval_anns (bool): Whether to load annotations
+        test_mode (bool): Whether the dataset is in test mode.
-            in test_mode, the annotation will be save in
+            Defaults to False.
-            `eval_ann_infos`, which can be use in Evaluator.
+        load_eval_anns (bool): Whether to load annotations in test_mode,
+            the annotation will be save in `eval_ann_infos`, which can be used
+            in Evaluator. Defaults to True.
        file_client_args (dict): Configuration of file client.
-            Defaults to `dict(backend='disk')`.
+            Defaults to dict(backend='disk').
    """
    METAINFO = {
        'CLASSES': None,  # names of all classes data used for the task
@@ -66,7 +64,7 @@ class Seg3DDataset(BaseDataset):
                 pipeline: List[Union[dict, Callable]] = [],
                 modality: dict = dict(use_lidar=True, use_camera=False),
                 ignore_index: Optional[int] = None,
-                 scene_idxs: Optional[str] = None,
+                 scene_idxs: Optional[Union[str, np.ndarray]] = None,
                 test_mode: bool = False,
                 load_eval_anns: bool = True,
                 file_client_args: dict = dict(backend='disk'),
@@ -121,6 +119,7 @@ class Seg3DDataset(BaseDataset):
        self.metainfo['seg_label_mapping'] = self.seg_label_mapping
        self.scene_idxs = self.get_scene_idxs(scene_idxs)
+        self.data_list = [self.data_list[i] for i in self.scene_idxs]
        # set group flag for the sampler
        if not self.test_mode:
@@ -141,7 +140,6 @@ class Seg3DDataset(BaseDataset):
            new_classes (list, tuple, optional): The new classes name from
                metainfo. Default to None.
        Returns:
            tuple: The mapping from old classes in cls.METAINFO to
            new classes in metainfo

--- a/mmdet3d/datasets/semantickitti_dataset.py
+++ b/mmdet3d/datasets/semantickitti_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Callable, List, Optional, Union
+import numpy as np
 from mmdet3d.registry import DATASETS
 from .seg3d_dataset import Seg3DDataset
@@ -14,26 +16,28 @@ class SemanticKITTIDataset(Seg3DDataset):
    for data downloading
    Args:
-        data_root (str): Path of dataset root.
+        data_root (str, optional): Path of dataset root. Defaults to None.
-        ann_file (str): Path of annotation file.
+        ann_file (str): Path of annotation file. Defaults to ''.
-        pipeline (list[dict], optional): Pipeline used for data processing.
+        metainfo (dict, optional): Meta information for dataset, such as class
+            information. Defaults to None.
+        data_prefix (dict): Prefix for training data. Defaults to
+            dict(pts='points', img='', instance_mask='', semantic_mask='').
+        pipeline (list[dict]): Pipeline used for data processing.
+            Defaults to [].
+        modality (dict): Modality to specify the sensor data used as input,
+            it usually has following keys:
+                - use_camera: bool
+                - use_lidar: bool
+            Defaults to dict(use_lidar=True, use_camera=False).
+        ignore_index (int, optional): The label index to be ignored, e.g.
+            unannotated points. If None is given, set to len(self.CLASSES) to
+            be consistent with PointSegClassMapping function in pipeline.
            Defaults to None.
-        classes (tuple[str], optional): Classes used in the dataset.
+        scene_idxs (np.ndarray | str, optional): Precomputed index to load
+            data. For scenes with many points, we may sample it several times.
            Defaults to None.
-        modality (dict, optional): Modality to specify the sensor data used
+        test_mode (bool): Whether the dataset is in test mode.
-            as input. Defaults to None.
-        box_type_3d (str, optional): NO 3D box for this dataset.
-            You can choose any type
-            Based on the `box_type_3d`, the dataset will encapsulate the box
-            to its original format then converted them to `box_type_3d`.
-            Defaults to 'LiDAR' in this dataset. Available options includes
-            - 'LiDAR': Box in LiDAR coordinates.
-            - 'Depth': Box in depth coordinates, usually for indoor dataset.
-            - 'Camera': Box in camera coordinates.
-        filter_empty_gt (bool, optional): Whether to filter empty GT.
-            Defaults to True.
-        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
    """
    METAINFO = {
@@ -55,9 +59,9 @@ class SemanticKITTIDataset(Seg3DDataset):
                     pts='points', img='', instance_mask='', semantic_mask=''),
                 pipeline: List[Union[dict, Callable]] = [],
                 modality: dict = dict(use_lidar=True, use_camera=False),
-                 ignore_index=None,
+                 ignore_index: Optional[int] = None,
-                 scene_idxs=None,
+                 scene_idxs: Optional[Union[str, np.ndarray]] = None,
-                 test_mode=False,
+                 test_mode: bool = False,
                 **kwargs) -> None:
        super().__init__(

--- a/mmdet3d/datasets/sunrgbd_dataset.py
+++ b/mmdet3d/datasets/sunrgbd_dataset.py
@@ -24,13 +24,13 @@ class SUNRGBDDataset(Det3DDataset):
        ann_file (str): Path of annotation file.
        metainfo (dict, optional): Meta information for dataset, such as class
            information. Defaults to None.
-        data_prefix (dict, optiona;): Prefix for data. Defaults to
+        data_prefix (dict): Prefix for data. Defaults to
            dict(pts='points',img='sunrgbd_trainval').
-        pipeline (list[dict], optional): Pipeline used for data processing.
+        pipeline (list[dict]): Pipeline used for data processing.
-            Defaults to None.
+            Defaults to [].
-        modality (dict, optional): Modality to specify the sensor data used
+        modality (dict): Modality to specify the sensor data used as input.
-            as input. Defaults to dict(use_camera=True, use_lidar=True).
+            Defaults to dict(use_camera=True, use_lidar=True).
-        default_cam_key (str, optional): The default camera name adopted.
+        default_cam_key (str): The default camera name adopted.
            Defaults to 'CAM0'.
        box_type_3d (str): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
@@ -40,9 +40,9 @@ class SUNRGBDDataset(Det3DDataset):
            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
-        filter_empty_gt (bool, optional): Whether to filter empty GT.
+        filter_empty_gt (bool): Whether to filter empty GT.
            Defaults to True.
-        test_mode (bool, optional): Whether the dataset is in test mode.
+        test_mode (bool): Whether the dataset is in test mode.
            Defaults to False.
    """
    METAINFO = {
@@ -58,11 +58,11 @@ class SUNRGBDDataset(Det3DDataset):
                     pts='points', img='sunrgbd_trainval/image'),
                 pipeline: List[Union[dict, Callable]] = [],
                 default_cam_key: str = 'CAM0',
-                 modality=dict(use_camera=True, use_lidar=True),
+                 modality: dict = dict(use_camera=True, use_lidar=True),
                 box_type_3d: str = 'Depth',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,
-                 **kwargs):
+                 **kwargs) -> None:
        super().__init__(
            data_root=data_root,
            ann_file=ann_file,
@@ -121,7 +121,7 @@ class SUNRGBDDataset(Det3DDataset):
        return info
    def parse_ann_info(self, info: dict) -> dict:
-        """Process the `instances` in data info to `ann_info`
+        """Process the `instances` in data info to `ann_info`.
        Args:
            info (dict): Info dict.

--- a/mmdet3d/datasets/transforms/dbsampler.py
+++ b/mmdet3d/datasets/transforms/dbsampler.py
@@ -18,9 +18,8 @@ class BatchSampler:
        sample_list (list[dict]): List of samples.
        name (str, optional): The category of samples. Defaults to None.
        epoch (int, optional): Sampling epoch. Defaults to None.
-        shuffle (bool, optional): Whether to shuffle indices.
+        shuffle (bool): Whether to shuffle indices. Defaults to False.
-            Defaults to False.
+        drop_reminder (bool): Drop reminder. Defaults to False.
-        drop_reminder (bool, optional): Drop reminder. Defaults to False.
    """
    def __init__(self,
@@ -90,12 +89,11 @@ class DataBaseSampler(object):
        prepare (dict): Name of preparation functions and the input value.
        sample_groups (dict): Sampled classes and numbers.
        classes (list[str], optional): List of classes. Defaults to None.
-        points_loader(dict, optional): Config of points loader. Defaults to
+        points_loader (dict): Config of points loader. Defaults to
            dict(type='LoadPointsFromFile', load_dim=4, use_dim=[0, 1, 2, 3]).
-        file_client_args (dict, optional): Config dict of file clients,
+        file_client_args (dict): Arguments to instantiate a FileClient.
-            refer to
+            See :class:`mmengine.fileio.FileClient` for details.
-            https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
+            Defaults to dict(backend='disk').
-            for more details. Defaults to dict(backend='disk').
    """
    def __init__(

--- a/mmdet3d/datasets/transforms/formating.py
+++ b/mmdet3d/datasets/transforms/formating.py
@@ -102,7 +102,7 @@ class Pack3DDetInputs(BaseTransform):
                - points
                - img
-            - 'data_samples' (obj:`Det3DDataSample`): The annotation info of
+            - 'data_samples' (:obj:`Det3DDataSample`): The annotation info of
              the sample.
        """
        # augtest

--- a/mmdet3d/datasets/transforms/loading.py
+++ b/mmdet3d/datasets/transforms/loading.py
@@ -20,19 +20,17 @@ class LoadMultiViewImageFromFiles(BaseTransform):
    Expects results['img_filename'] to be a list of filenames.
    Args:
-        to_float32 (bool, optional): Whether to convert the img to float32.
+        to_float32 (bool): Whether to convert the img to float32.
            Defaults to False.
-        color_type (str, optional): Color type of the file.
+        color_type (str): Color type of the file. Defaults to 'unchanged'.
-            Defaults to 'unchanged'.
+        file_client_args (dict): Arguments to instantiate a FileClient.
-        file_client_args (dict): Config dict of file clients,
+            See :class:`mmengine.fileio.FileClient` for details.
-            refer to
+            Defaults to dict(backend='disk').
-            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
+        num_views (int): Number of view in a frame. Defaults to 5.
-            for more details. Defaults to dict(backend='disk').
+        num_ref_frames (int): Number of frame in loading. Defaults to -1.
-        num_views (int): num of view in a frame. Default to 5.
+        test_mode (bool): Whether is test mode in loading. Defaults to False.
-        num_ref_frames (int): num of frame in loading. Default to -1.
+        set_default_scale (bool): Whether to set default scale.
-        test_mode (bool): Whether is test mode in loading. Default to False.
+            Defaults to True.
-        set_default_scale (bool): Whether to set default scale. Default to
-        True.
    """
    def __init__(self,
@@ -210,7 +208,7 @@ class LoadMultiViewImageFromFiles(BaseTransform):
        results['num_ref_frames'] = self.num_ref_frames
        return results
-    def __repr__(self):
+    def __repr__(self) -> str:
        """str: Return a string that describes the module."""
        repr_str = self.__class__.__name__
        repr_str += f'(to_float32={self.to_float32}, '
@@ -276,22 +274,17 @@ class LoadPointsFromMultiSweeps(BaseTransform):
    This is usually used for nuScenes dataset to utilize previous sweeps.
    Args:
-        sweeps_num (int, optional): Number of sweeps. Defaults to 10.
+        sweeps_num (int): Number of sweeps. Defaults to 10.
-        load_dim (int, optional): Dimension number of the loaded points.
+        load_dim (int): Dimension number of the loaded points. Defaults to 5.
-            Defaults to 5.
+        use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4].
-        use_dim (list[int], optional): Which dimension to use.
+        file_client_args (dict): Arguments to instantiate a FileClient.
-            Defaults to [0, 1, 2, 4].
+            See :class:`mmengine.fileio.FileClient` for details.
-        file_client_args (dict, optional): Config dict of file clients,
+            Defaults to dict(backend='disk').
-            refer to
+        pad_empty_sweeps (bool): Whether to repeat keyframe when
-            https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
-            for more details. Defaults to dict(backend='disk').
-        pad_empty_sweeps (bool, optional): Whether to repeat keyframe when
            sweeps is empty. Defaults to False.
-        remove_close (bool, optional): Whether to remove close points.
+        remove_close (bool): Whether to remove close points. Defaults to False.
-            Defaults to False.
+        test_mode (bool): If `test_mode=True`, it will not randomly sample
-        test_mode (bool, optional): If `test_mode=True`, it will not
+            sweeps but select the nearest N frames. Defaults to False.
-            randomly sample sweeps but select the nearest N frames.
-            Defaults to False.
    """
    def __init__(self,
@@ -336,11 +329,11 @@ class LoadPointsFromMultiSweeps(BaseTransform):
    def _remove_close(self,
                      points: Union[np.ndarray, BasePoints],
                      radius: float = 1.0) -> Union[np.ndarray, BasePoints]:
-        """Removes point too close within a certain radius from origin.
+        """Remove point too close within a certain radius from origin.
        Args:
            points (np.ndarray | :obj:`BasePoints`): Sweep points.
-            radius (float, optional): Radius below which points are removed.
+            radius (float): Radius below which points are removed.
                Defaults to 1.0.
        Returns:
@@ -414,7 +407,7 @@ class LoadPointsFromMultiSweeps(BaseTransform):
        results['points'] = points
        return results
-    def __repr__(self):
+    def __repr__(self) -> str:
        """str: Return a string that describes the module."""
        return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'
@@ -465,7 +458,7 @@ class PointSegClassMapping(BaseTransform):
        return results
-    def __repr__(self):
+    def __repr__(self) -> str:
        """str: Return a string that describes the module."""
        repr_str = self.__class__.__name__
        return repr_str
@@ -505,7 +498,7 @@ class NormalizePointsColor(BaseTransform):
        input_dict['points'] = points
        return input_dict
-    def __repr__(self):
+    def __repr__(self) -> str:
        """str: Return a string that describes the module."""
        repr_str = self.__class__.__name__
        repr_str += f'(color_mean={self.color_mean})'
@@ -533,19 +526,15 @@ class LoadPointsFromFile(BaseTransform):
            - 'LIDAR': Points in LiDAR coordinates.
            - 'DEPTH': Points in depth coordinates, usually for indoor dataset.
            - 'CAMERA': Points in camera coordinates.
-        load_dim (int, optional): The dimension of the loaded points.
+        load_dim (int): The dimension of the loaded points. Defaults to 6.
-            Defaults to 6.
+        use_dim (list[int] | int): Which dimensions of the points to use.
-        use_dim (list[int] | int, optional): Which dimensions of the points
+            Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
-            to use. Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
            or use_dim=[0, 1, 2, 3] to use the intensity dimension.
-        shift_height (bool, optional): Whether to use shifted height.
+        shift_height (bool): Whether to use shifted height. Defaults to False.
-            Defaults to False.
+        use_color (bool): Whether to use color features. Defaults to False.
-        use_color (bool, optional): Whether to use color features.
+        file_client_args (dict): Arguments to instantiate a FileClient.
-            Defaults to False.
+            See :class:`mmengine.fileio.FileClient` for details.
-        file_client_args (dict, optional): Config dict of file clients,
+            Defaults to dict(backend='disk').
-            refer to
-            https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
-            for more details. Defaults to dict(backend='disk').
    """
    def __init__(
@@ -638,7 +627,7 @@ class LoadPointsFromFile(BaseTransform):
        return results
-    def __repr__(self):
+    def __repr__(self) -> str:
        """str: Return a string that describes the module."""
        repr_str = self.__class__.__name__ + '('
        repr_str += f'shift_height={self.shift_height}, '
@@ -688,7 +677,7 @@ class LoadAnnotations3D(LoadAnnotations):
    - pts_instance_mask_path (str): Path of instance mask file.
      Only when `with_mask_3d` is True.
    - pts_semantic_mask_path (str): Path of semantic mask file.
-      Only when
+      Only when `with_seg_3d` is True.
    Added Keys:
@@ -713,33 +702,25 @@ class LoadAnnotations3D(LoadAnnotations):
      Only when `with_seg_3d` is True.
    Args:
-        with_bbox_3d (bool, optional): Whether to load 3D boxes.
+        with_bbox_3d (bool): Whether to load 3D boxes. Defaults to True.
-            Defaults to True.
+        with_label_3d (bool): Whether to load 3D labels. Defaults to True.
-        with_label_3d (bool, optional): Whether to load 3D labels.
+        with_attr_label (bool): Whether to load attribute label.
-            Defaults to True.
-        with_attr_label (bool, optional): Whether to load attribute label.
-            Defaults to False.
-        with_mask_3d (bool, optional): Whether to load 3D instance masks.
-            for points. Defaults to False.
-        with_seg_3d (bool, optional): Whether to load 3D semantic masks.
-            for points. Defaults to False.
-        with_bbox (bool, optional): Whether to load 2D boxes.
-            Defaults to False.
-        with_label (bool, optional): Whether to load 2D labels.
            Defaults to False.
-        with_mask (bool, optional): Whether to load 2D instance masks.
+        with_mask_3d (bool): Whether to load 3D instance masks for points.
            Defaults to False.
-        with_seg (bool, optional): Whether to load 2D semantic masks.
+        with_seg_3d (bool): Whether to load 3D semantic masks for points.
            Defaults to False.
-        with_bbox_depth (bool, optional): Whether to load 2.5D boxes.
+        with_bbox (bool): Whether to load 2D boxes. Defaults to False.
-            Defaults to False.
+        with_label (bool): Whether to load 2D labels. Defaults to False.
-        poly2mask (bool, optional): Whether to convert polygon annotations
+        with_mask (bool): Whether to load 2D instance masks. Defaults to False.
-            to bitmasks. Defaults to True.
+        with_seg (bool): Whether to load 2D semantic masks. Defaults to False.
-        seg_3d_dtype (dtype, optional): Dtype of 3D semantic masks.
+        with_bbox_depth (bool): Whether to load 2.5D boxes. Defaults to False.
-            Defaults to int64.
+        poly2mask (bool): Whether to convert polygon annotations to bitmasks.
-        file_client_args (dict): Config dict of file clients, refer to
+            Defaults to True.
-            https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
+        seg_3d_dtype (dtype): Dtype of 3D semantic masks. Defaults to int64.
-            for more details.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmengine.fileio.FileClient` for details.
+            Defaults to dict(backend='disk').
    """
    def __init__(
@@ -889,7 +870,8 @@ class LoadAnnotations3D(LoadAnnotations):
        `ignore_flag`
        Args:
-            results (dict): Result dict from :obj:``mmcv.BaseDataset``.
+            results (dict): Result dict from :obj:`mmcv.BaseDataset`.
        Returns:
            dict: The dict contains loaded bounding box annotations.
        """
@@ -900,7 +882,7 @@ class LoadAnnotations3D(LoadAnnotations):
        """Private function to load label annotations.
        Args:
-            results (dict): Result dict from :obj :obj:``mmcv.BaseDataset``.
+            results (dict): Result dict from :obj :obj:`mmcv.BaseDataset`.
        Returns:
            dict: The dict contains loaded label annotations.
@@ -933,7 +915,7 @@ class LoadAnnotations3D(LoadAnnotations):
        return results
-    def __repr__(self):
+    def __repr__(self) -> str:
        """str: Return a string that describes the module."""
        indent_str = '    '
        repr_str = self.__class__.__name__ + '(\n'

--- a/mmdet3d/datasets/transforms/test_time_aug.py
+++ b/mmdet3d/datasets/transforms/test_time_aug.py
@@ -19,18 +19,17 @@ class MultiScaleFlipAug3D(BaseTransform):
        img_scale (tuple | list[tuple]): Images scales for resizing.
        pts_scale_ratio (float | list[float]): Points scale ratios for
            resizing.
-        flip (bool, optional): Whether apply flip augmentation.
+        flip (bool): Whether apply flip augmentation. Defaults to False.
-            Defaults to False.
+        flip_direction (str | list[str]): Flip augmentation directions
-        flip_direction (str | list[str], optional): Flip augmentation
+            for images, options are "horizontal" and "vertical".
-            directions for images, options are "horizontal" and "vertical".
            If flip_direction is list, multiple flip augmentations will
            be applied. It has no effect when ``flip == False``.
            Defaults to 'horizontal'.
-        pcd_horizontal_flip (bool, optional): Whether to apply horizontal
+        pcd_horizontal_flip (bool): Whether to apply horizontal flip
-            flip augmentation to point cloud. Defaults to True.
+            augmentation to point cloud. Defaults to False.
            Note that it works only when 'flip' is turned on.
-        pcd_vertical_flip (bool, optional): Whether to apply vertical flip
+        pcd_vertical_flip (bool): Whether to apply vertical flip
-            augmentation to point cloud. Defaults to True.
+            augmentation to point cloud. Defaults to False.
            Note that it works only when 'flip' is turned on.
    """
@@ -112,7 +111,7 @@ class MultiScaleFlipAug3D(BaseTransform):
        return aug_data_list
-    def __repr__(self):
+    def __repr__(self) -> str:
        """str: Return a string that describes the module."""
        repr_str = self.__class__.__name__
        repr_str += f'(transforms={self.transforms}, '

--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
--- a/mmdet3d/datasets/waymo_dataset.py
+++ b/mmdet3d/datasets/waymo_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import os.path as osp
-from typing import Callable, List, Optional, Union
+from typing import Callable, List, Union
 import numpy as np
@@ -24,20 +24,20 @@ class WaymoDataset(KittiDataset):
        data_root (str): Path of dataset root.
        ann_file (str): Path of annotation file.
        data_prefix (dict): data prefix for point cloud and
-            camera data dict. Default to dict(
+            camera data dict. Defaults to dict(
                                    pts='velodyne',
                                    CAM_FRONT='image_0',
                                    CAM_FRONT_RIGHT='image_1',
                                    CAM_FRONT_LEFT='image_2',
                                    CAM_SIDE_RIGHT='image_3',
                                    CAM_SIDE_LEFT='image_4')
-        pipeline (list[dict], optional): Pipeline used for data processing.
+        pipeline (list[dict]): Pipeline used for data processing.
-            Defaults to None.
+            Defaults to [].
-        modality (dict, optional): Modality to specify the sensor data used
+        modality (dict): Modality to specify the sensor data used
            as input. Defaults to dict(use_lidar=True).
-        default_cam_key (str, optional): Default camera key for lidar2img
+        default_cam_key (str): Default camera key for lidar2img
            association. Defaults to 'CAM_FRONT'.
-        box_type_3d (str, optional): Type of 3D box of this dataset.
+        box_type_3d (str): Type of 3D box of this dataset.
            Based on the `box_type_3d`, the dataset will encapsulate the box
            to its original format then converted them to `box_type_3d`.
            Defaults to 'LiDAR' in this dataset. Available options includes:
@@ -45,22 +45,23 @@ class WaymoDataset(KittiDataset):
            - 'LiDAR': Box in LiDAR coordinates.
            - 'Depth': Box in depth coordinates, usually for indoor dataset.
            - 'Camera': Box in camera coordinates.
-        filter_empty_gt (bool, optional): Whether to filter empty GT.
+        filter_empty_gt (bool): Whether to filter the data with empty GT.
-            Defaults to True.
+            If it's set to be True, the example with empty annotations after
-        test_mode (bool, optional): Whether the dataset is in test mode.
+            data pipeline will be dropped and a random example will be chosen
+            in `__getitem__`. Defaults to True.
+        test_mode (bool): Whether the dataset is in test mode.
            Defaults to False.
-        pcd_limit_range (list[float], optional): The range of point cloud
+        pcd_limit_range (list[float]): The range of point cloud
            used to filter invalid predicted boxes.
            Defaults to [-85, -85, -5, 85, 85, 5].
-        cam_sync_instances (bool, optional): If use the camera sync label
+        cam_sync_instances (bool): If use the camera sync label
            supported from waymo version 1.3.1. Defaults to False.
-        load_interval (int, optional): load frame interval.
+        load_interval (int): load frame interval. Defaults to 1.
-            Defaults to 1.
+        task (str): task for 3D detection (lidar, mono3d).
-        task (str, optional): task for 3D detection (lidar, mono3d).
            lidar: take all the ground trurh in the frame.
            mono3d: take the groundtruth that can be seen in the cam.
-            Defaults to 'lidar'.
+            Defaults to 'lidar_det'.
-        max_sweeps (int, optional): max sweep for each frame. Defaults to 0.
+        max_sweeps (int): max sweep for each frame. Defaults to 0.
    """
    METAINFO = {'CLASSES': ('Car', 'Pedestrian', 'Cyclist')}
@@ -75,17 +76,17 @@ class WaymoDataset(KittiDataset):
                     CAM_SIDE_RIGHT='image_3',
                     CAM_SIDE_LEFT='image_4'),
                 pipeline: List[Union[dict, Callable]] = [],
-                 modality: Optional[dict] = dict(use_lidar=True),
+                 modality: dict = dict(use_lidar=True),
                 default_cam_key: str = 'CAM_FRONT',
                 box_type_3d: str = 'LiDAR',
                 filter_empty_gt: bool = True,
                 test_mode: bool = False,
                 pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
-                 cam_sync_instances=False,
+                 cam_sync_instances: bool = False,
-                 load_interval=1,
+                 load_interval: int = 1,
-                 task='lidar_det',
+                 task: str = 'lidar_det',
-                 max_sweeps=0,
+                 max_sweeps: int = 0,
-                 **kwargs):
+                 **kwargs) -> None:
        self.load_interval = load_interval
        # set loading mode for different task settings
        self.cam_sync_instances = cam_sync_instances
@@ -111,7 +112,7 @@ class WaymoDataset(KittiDataset):
            **kwargs)
    def parse_ann_info(self, info: dict) -> dict:
-        """Get annotation info according to the given index.
+        """Process the `instances` in data info to `ann_info`.
        Args:
            info (dict): Data information of single data sample.

--- a/mmdet3d/models/decode_heads/decode_head.py
+++ b/mmdet3d/models/decode_heads/decode_head.py
@@ -41,19 +41,20 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
    Args:
        channels (int): Channels after modules, before conv_seg.
        num_classes (int): Number of classes.
-        dropout_ratio (float, optional): Ratio of dropout layer. Default: 0.5.
+        dropout_ratio (float): Ratio of dropout layer. Defaults to 0.5.
-        conv_cfg (dict, optional): Config of conv layers.
+        conv_cfg (dict): Config of conv layers.
-            Default: dict(type='Conv1d').
+            Defaults to dict(type='Conv1d').
-        norm_cfg (dict, optional): Config of norm layers.
+        norm_cfg (dict): Config of norm layers.
-            Default: dict(type='BN1d').
+            Defaults to dict(type='BN1d').
-        act_cfg (dict, optional): Config of activation layers.
+        act_cfg (dict): Config of activation layers.
-            Default: dict(type='ReLU').
+            Defaults to dict(type='ReLU').
-        loss_decode (dict, optional): Config of decode loss.
+        loss_decode (dict): Config of decode loss.
-            Default: dict(type='CrossEntropyLoss').
+            Defaults to dict(type='CrossEntropyLoss').
-        ignore_index (int, optional): The label index to be ignored.
+        ignore_index (int): The label index to be ignored.
            When using masked BCE loss, ignore_index should be set to None.
-            Default: 255.
+            Defaults to 255.
        init_cfg (dict or list[dict], optional): Initialization config dict.
+            Defaults to None.
    """
    def __init__(self,
@@ -105,8 +106,8 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
        output = self.conv_seg(feat)
        return output
-    def loss(self, inputs: List[Tensor],
+    def loss(self, inputs: List[Tensor], batch_data_samples: SampleList,
-             batch_data_samples: SampleList) -> dict:
+             train_cfg: ConfigType) -> dict:
        """Forward function for training.
        Args:

--- a/mmdet3d/models/segmentors/base.py
+++ b/mmdet3d/models/segmentors/base.py
@@ -140,7 +140,8 @@ class Base3DSegmentor(BaseModel, metaclass=ABCMeta):
    def postprocess_result(self, seg_pred_list: List[dict],
                           batch_img_metas: List[dict]) -> list:
-        """ Convert results list to `Det3DDataSample`.
+        """Convert results list to `Det3DDataSample`.
        Args:
            seg_logits_list (List[dict]): List of segmentation results,
                seg_logits from model of each input point clouds sample.
@@ -157,7 +158,8 @@ class Base3DSegmentor(BaseModel, metaclass=ABCMeta):
        for i in range(len(seg_pred_list)):
            img_meta = batch_img_metas[i]
            seg_pred = seg_pred_list[i]
-            prediction = Det3DDataSample(**{'metainfo': img_meta})
+            prediction = Det3DDataSample(**{'metainfo': img_meta.metainfo})
+            prediction.set_data({'eval_ann_info': img_meta.eval_ann_info})
            prediction.set_data(
                {'pred_pts_seg': PointData(**{'pts_semantic_mask': seg_pred})})
            predictions.append(prediction)

--- a/mmdet3d/models/segmentors/encoder_decoder.py
+++ b/mmdet3d/models/segmentors/encoder_decoder.py
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import List
+from typing import List, Tuple
 import numpy as np
 import torch
@@ -65,10 +65,10 @@ class EncoderDecoder3D(Base3DSegmentor):
            loass. Defaults to None.
        train_cfg (OptConfigType): The config for training. Defaults to None.
        test_cfg (OptConfigType): The config for testing. Defaults to None.
-        data_preprocessor (dict, optional): The pre-process config of
+        data_preprocessor (OptConfigType): The pre-process config of
-            :class:`BaseDataPreprocessor`.
+            :class:`BaseDataPreprocessor`. Defaults to None.
-        init_cfg (dict, optional): The weight initialized config for
+        init_cfg (OptMultiConfig): The weight initialized config for
-            :class:`BaseModule`.
+            :class:`BaseModule`. Defaults to None.
    """  # noqa: E501
    def __init__(self,
@@ -80,7 +80,7 @@ class EncoderDecoder3D(Base3DSegmentor):
                 train_cfg: OptConfigType = None,
                 test_cfg: OptConfigType = None,
                 data_preprocessor: OptConfigType = None,
-                 init_cfg: OptMultiConfig = None):
+                 init_cfg: OptMultiConfig = None) -> None:
        super(EncoderDecoder3D, self).__init__(
            data_preprocessor=data_preprocessor, init_cfg=init_cfg)
        self.backbone = MODELS.build(backbone)
@@ -122,15 +122,15 @@ class EncoderDecoder3D(Base3DSegmentor):
            else:
                self.loss_regularization = MODELS.build(loss_regularization)
-    def extract_feat(self, batch_inputs) -> List[Tensor]:
+    def extract_feat(self, batch_inputs: Tensor) -> Tensor:
        """Extract features from points."""
        x = self.backbone(batch_inputs)
        if self.with_neck:
            x = self.neck(x)
        return x
-    def encode_decode(self, batch_inputs: torch.Tensor,
+    def encode_decode(self, batch_inputs: Tensor,
-                      batch_input_metas: List[dict]) -> List[Tensor]:
+                      batch_input_metas: List[dict]) -> Tensor:
        """Encode points with backbone and decode into a semantic segmentation
        map of the same size as input.
@@ -178,7 +178,7 @@ class EncoderDecoder3D(Base3DSegmentor):
        return losses
-    def _loss_regularization_forward_train(self):
+    def _loss_regularization_forward_train(self) -> dict:
        """Calculate regularization loss for model weight in training."""
        losses = dict()
        if isinstance(self.loss_regularization, nn.ModuleList):
@@ -213,7 +213,8 @@ class EncoderDecoder3D(Base3DSegmentor):
        """
        # extract features using backbone
-        x = self.extract_feat(batch_inputs_dict)
+        points = torch.stack(batch_inputs_dict['points'])
+        x = self.extract_feat(points)
        losses = dict()
@@ -236,7 +237,7 @@ class EncoderDecoder3D(Base3DSegmentor):
                          patch_center: Tensor,
                          coord_max: Tensor,
                          feats: Tensor,
-                          use_normalized_coord: bool = False):
+                          use_normalized_coord: bool = False) -> Tensor:
        """Generating model input.
        Generate input by subtracting patch center and adding additional
@@ -273,7 +274,7 @@ class EncoderDecoder3D(Base3DSegmentor):
                                  block_size: float,
                                  sample_rate: float = 0.5,
                                  use_normalized_coord: bool = False,
-                                  eps: float = 1e-3):
+                                  eps: float = 1e-3) -> Tuple[Tensor, Tensor]:
        """Sampling points in a sliding window fashion.
        First sample patches to cover all the input points.
@@ -291,7 +292,7 @@ class EncoderDecoder3D(Base3DSegmentor):
                points coverage. Defaults to 1e-3.
        Returns:
-            np.ndarray | np.ndarray:
+            tuple:
                - patch_points (torch.Tensor): Points of different patches of
                  shape [K, N, 3+C].
@@ -372,7 +373,7 @@ class EncoderDecoder3D(Base3DSegmentor):
        return patch_points, patch_idxs
    def slide_inference(self, point: Tensor, img_meta: List[dict],
-                        rescale: bool):
+                        rescale: bool) -> Tensor:
        """Inference by sliding-window with overlap.
        Args:
@@ -417,14 +418,14 @@ class EncoderDecoder3D(Base3DSegmentor):
        return preds.transpose(0, 1)  # to [num_classes, K*N]
    def whole_inference(self, points: Tensor, input_metas: List[dict],
-                        rescale: bool):
+                        rescale: bool) -> Tensor:
        """Inference with full scene (one forward pass without sliding)."""
        seg_logit = self.encode_decode(points, input_metas)
        # TODO: if rescale and voxelization segmentor
        return seg_logit
    def inference(self, points: Tensor, input_metas: List[dict],
-                  rescale: bool):
+                  rescale: bool) -> Tensor:
        """Inference with slide/whole style.
        Args:
@@ -489,7 +490,7 @@ class EncoderDecoder3D(Base3DSegmentor):
            seg_map = seg_map.cpu()
            seg_pred_list.append(seg_map)
-        return self.postprocess_result(seg_pred_list, batch_input_metas)
+        return self.postprocess_result(seg_pred_list, batch_data_samples)
    def _forward(self,
                 batch_inputs_dict: dict,
@@ -510,7 +511,8 @@ class EncoderDecoder3D(Base3DSegmentor):
        Returns:
            Tensor: Forward output of model without any post-processes.
        """
-        x = self.extract_feat(batch_inputs_dict)
+        points = torch.stack(batch_inputs_dict['points'])
+        x = self.extract_feat(points)
        return self.decode_head.forward(x)
    def aug_test(self, batch_inputs, batch_img_metas):

--- a/mmdet3d/version.py
+++ b/mmdet3d/version.py
@@ -5,6 +5,15 @@ short_version = __version__
 def parse_version_info(version_str):
+    """Parse a version string into a tuple.
+    Args:
+        version_str (str): The version string.
+    Returns:
+        tuple[int | str]: The version info, e.g., "1.3.0" is parsed into
+            (1, 3, 0), and "2.0.0rc1" is parsed into (2, 0, 0, 'rc1').
+    """
    version_info = []
    for x in version_str.split('.'):
        if x.isdigit():

--- a/requirements/mminstall.txt
+++ b/requirements/mminstall.txt
-mmcv-full>=2.0.0rc0,<2.1.0
+mmcv>=2.0.0rc0,<2.1.0
 mmdet>=3.0.0rc0,<3.1.0
+mmengine>=0.1.0,<1.0.0