Bump version to v1.3.0

Bump version to v1.3.0

Bump version to v1.3.0
ac289b35 · Tai-Wang · GitHub · b0e8ece9 · 12b595ca · ac289b35
Unverified Commit ac289b35 authored Oct 19, 2023 by Tai-Wang Committed by GitHub Oct 19, 2023
20 changed files
--- a/docs/zh_cn/notes/faq.md
+++ b/docs/zh_cn/notes/faq.md
@@ -10,12 +10,11 @@
  | MMDetection3D 版本 |      MMEngine 版本       |        MMCV 版本        |     MMDetection 版本     |
  | ------------------ | :----------------------: | :---------------------: | :----------------------: |
-  | dev-1.x            | mmengine>=0.7.1, \<1.0.0 | mmcv>=2.0.0rc4, \<2.1.0 |  mmdet>=3.0.0, \<3.1.0   |
+  | dev-1.x            | mmengine>=0.8.0, \<1.0.0 | mmcv>=2.0.0rc4, \<2.2.0 | mmdet>=3.0.0rc5, \<3.3.0 |
-  | main               | mmengine>=0.7.1, \<1.0.0 | mmcv>=2.0.0rc4, \<2.1.0 |  mmdet>=3.0.0, \<3.1.0   |
+  | main               | mmengine>=0.8.0, \<1.0.0 | mmcv>=2.0.0rc4, \<2.2.0 | mmdet>=3.0.0rc5, \<3.3.0 |
-  | v1.1.0rc3          | mmengine>=0.1.0, \<1.0.0 | mmcv>=2.0.0rc3, \<2.1.0 | mmdet>=3.0.0rc0, \<3.1.0 |
+  | v1.3.0             | mmengine>=0.8.0, \<1.0.0 | mmcv>=2.0.0rc4, \<2.2.0 | mmdet>=3.0.0rc5, \<3.3.0 |
-  | v1.1.0rc2          | mmengine>=0.1.0, \<1.0.0 | mmcv>=2.0.0rc3, \<2.1.0 | mmdet>=3.0.0rc0, \<3.1.0 |
+  | v1.2.0             | mmengine>=0.8.0, \<1.0.0 | mmcv>=2.0.0rc4, \<2.1.0 |  mmdet>=3.0.0, \<3.2.0   |
-  | v1.1.0rc1          | mmengine>=0.1.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | mmdet>=3.0.0rc0, \<3.1.0 |
+  | v1.1.1             | mmengine>=0.7.1, \<1.0.0 | mmcv>=2.0.0rc4, \<2.1.0 |  mmdet>=3.0.0, \<3.1.0   |
-  | v1.1.0rc0          | mmengine>=0.1.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 | mmdet>=3.0.0rc0, \<3.1.0 |
  **注意**：如果你想安装 mmdet3d-v1.0.0rcx，可以在[此处](https://mmdetection3d.readthedocs.io/en/latest/faq.html#mmcv-mmdet-mmdet3d-installation)找到 MMDetection，MMSegmentation 和 MMCV 的兼容版本。请选择正确版本的 MMCV、MMDetection 和 MMSegmentation 以避免安装问题。

--- a/mmdet3d/__init__.py
+++ b/mmdet3d/__init__.py
@@ -7,15 +7,15 @@ from mmengine.utils import digit_version
 from .version import __version__, version_info
 mmcv_minimum_version = '2.0.0rc4'
-mmcv_maximum_version = '2.1.0'
+mmcv_maximum_version = '2.2.0'
 mmcv_version = digit_version(mmcv.__version__)
 mmengine_minimum_version = '0.8.0'
 mmengine_maximum_version = '1.0.0'
 mmengine_version = digit_version(mmengine.__version__)
-mmdet_minimum_version = '3.0.0'
+mmdet_minimum_version = '3.0.0rc5'
-mmdet_maximum_version = '3.2.0'
+mmdet_maximum_version = '3.3.0'
 mmdet_version = digit_version(mmdet.__version__)
 assert (mmcv_version >= digit_version(mmcv_minimum_version)

--- a/mmdet3d/apis/inference.py
+++ b/mmdet3d/apis/inference.py
@@ -392,7 +392,8 @@ def inference_segmentor(model: nn.Module, pcds: PointsType):
    new_test_pipeline = []
    for pipeline in test_pipeline:
-        if pipeline['type'] != 'LoadAnnotations3D':
+        if pipeline['type'] != 'LoadAnnotations3D' and pipeline[
+                'type'] != 'PointSegClassMapping':
            new_test_pipeline.append(pipeline)
    test_pipeline = Compose(new_test_pipeline)

--- a/mmdet3d/apis/inferencers/base_3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/base_3d_inferencer.py
 # Copyright (c) OpenMMLab. All rights reserved.
+import logging
+import os.path as osp
+from copy import deepcopy
 from typing import Dict, List, Optional, Sequence, Tuple, Union
-import mmengine
 import numpy as np
 import torch.nn as nn
-from mmengine.fileio import (get_file_backend, isdir, join_path,
+from mmengine import dump, print_log
-                             list_dir_or_file)
 from mmengine.infer.infer import BaseInferencer, ModelType
+from mmengine.model.utils import revert_sync_batchnorm
 from mmengine.registry import init_default_scope
 from mmengine.runner import load_checkpoint
 from mmengine.structures import InstanceData
 from mmengine.visualization import Visualizer
+from rich.progress import track
-from mmdet3d.registry import MODELS
+from mmdet3d.registry import DATASETS, MODELS
+from mmdet3d.structures import Box3DMode, Det3DDataSample
 from mmdet3d.utils import ConfigType
 InstanceList = List[InstanceData]
@@ -44,14 +48,14 @@ class Base3DInferencer(BaseInferencer):
            priority is palette -> config -> checkpoint. Defaults to 'none'.
    """
-    preprocess_kwargs: set = set()
+    preprocess_kwargs: set = {'cam_type'}
    forward_kwargs: set = set()
    visualize_kwargs: set = {
        'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
-        'img_out_dir'
+        'img_out_dir', 'no_save_vis', 'cam_type_dir'
    }
    postprocess_kwargs: set = {
-        'print_result', 'pred_out_file', 'return_datasample'
+        'print_result', 'pred_out_dir', 'return_datasample', 'no_save_pred'
    }
    def __init__(self,
@@ -60,10 +64,14 @@ class Base3DInferencer(BaseInferencer):
                 device: Optional[str] = None,
                 scope: str = 'mmdet3d',
                 palette: str = 'none') -> None:
+        # A global counter tracking the number of frames processed, for
+        # naming of the output results
+        self.num_predicted_frames = 0
        self.palette = palette
        init_default_scope(scope)
        super().__init__(
            model=model, weights=weights, device=device, scope=scope)
+        self.model = revert_sync_batchnorm(self.model)
    def _convert_syncbn(self, cfg: ConfigType):
        """Convert config's naiveSyncBN to BN.
@@ -108,56 +116,19 @@ class Base3DInferencer(BaseInferencer):
            if 'PALETTE' in checkpoint.get('meta', {}):  # 3D Segmentor
                model.dataset_meta['palette'] = checkpoint['meta']['PALETTE']
+        test_dataset_cfg = deepcopy(cfg.test_dataloader.dataset)
+        # lazy init. We only need the metainfo.
+        test_dataset_cfg['lazy_init'] = True
+        metainfo = DATASETS.build(test_dataset_cfg).metainfo
+        cfg_palette = metainfo.get('palette', None)
+        if cfg_palette is not None:
+            model.dataset_meta['palette'] = cfg_palette
        model.cfg = cfg  # save the config in the model for convenience
        model.to(device)
        model.eval()
        return model
-    def _inputs_to_list(
-            self,
-            inputs: Union[dict, list],
-            modality_key: Union[str, List[str]] = 'points') -> list:
-        """Preprocess the inputs to a list.
-        Preprocess inputs to a list according to its type:
-        - list or tuple: return inputs
-        - dict: the value of key 'points'/`img` is
-            - Directory path: return all files in the directory
-            - other cases: return a list containing the string. The string
-              could be a path to file, a url or other types of string according
-              to the task.
-        Args:
-            inputs (Union[dict, list]): Inputs for the inferencer.
-            modality_key (Union[str, List[str]]): The key of the modality.
-                Defaults to 'points'.
-        Returns:
-            list: List of input for the :meth:`preprocess`.
-        """
-        if isinstance(modality_key, str):
-            modality_key = [modality_key]
-        assert set(modality_key).issubset({'points', 'img'})
-        for key in modality_key:
-            if isinstance(inputs, dict) and isinstance(inputs[key], str):
-                img = inputs[key]
-                backend = get_file_backend(img)
-                if hasattr(backend, 'isdir') and isdir(img):
-                    # Backends like HttpsBackend do not implement `isdir`, so
-                    # only those backends that implement `isdir` could accept
-                    # the inputs as a directory
-                    filename_list = list_dir_or_file(img, list_dir=False)
-                    inputs = [{
-                        f'{key}': join_path(img, filename)
-                    } for filename in filename_list]
-        if not isinstance(inputs, (list, tuple)):
-            inputs = [inputs]
-        return list(inputs)
    def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int:
        """Returns the index of the transform in a pipeline.
@@ -173,64 +144,81 @@ class Base3DInferencer(BaseInferencer):
        visualizer.dataset_meta = self.model.dataset_meta
        return visualizer
+    def _dispatch_kwargs(self,
+                         out_dir: str = '',
+                         cam_type: str = '',
+                         **kwargs) -> Tuple[Dict, Dict, Dict, Dict]:
+        """Dispatch kwargs to preprocess(), forward(), visualize() and
+        postprocess() according to the actual demands.
+        Args:
+            out_dir (str): Dir to save the inference results.
+            cam_type (str): Camera type. Defaults to ''.
+            **kwargs (dict): Key words arguments passed to :meth:`preprocess`,
+                :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
+                Each key in kwargs should be in the corresponding set of
+                ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
+                and ``postprocess_kwargs``.
+        Returns:
+            Tuple[Dict, Dict, Dict, Dict]: kwargs passed to preprocess,
+            forward, visualize and postprocess respectively.
+        """
+        kwargs['img_out_dir'] = out_dir
+        kwargs['pred_out_dir'] = out_dir
+        if cam_type != '':
+            kwargs['cam_type_dir'] = cam_type
+        return super()._dispatch_kwargs(**kwargs)
    def __call__(self,
                 inputs: InputsType,
-                 return_datasamples: bool = False,
                 batch_size: int = 1,
-                 return_vis: bool = False,
+                 return_datasamples: bool = False,
-                 show: bool = False,
+                 **kwargs) -> Optional[dict]:
-                 wait_time: int = 0,
-                 draw_pred: bool = True,
-                 pred_score_thr: float = 0.3,
-                 img_out_dir: str = '',
-                 print_result: bool = False,
-                 pred_out_file: str = '',
-                 **kwargs) -> dict:
        """Call the inferencer.
        Args:
            inputs (InputsType): Inputs for the inferencer.
+            batch_size (int): Batch size. Defaults to 1.
            return_datasamples (bool): Whether to return results as
                :obj:`BaseDataElement`. Defaults to False.
-            batch_size (int): Inference batch size. Defaults to 1.
+            **kwargs: Key words arguments passed to :meth:`preprocess`,
-            return_vis (bool): Whether to return the visualization result.
-                Defaults to False.
-            show (bool): Whether to display the visualization results in a
-                popup window. Defaults to False.
-            wait_time (float): The interval of show (s). Defaults to 0.
-            draw_pred (bool): Whether to draw predicted bounding boxes.
-                Defaults to True.
-            pred_score_thr (float): Minimum score of bboxes to draw.
-                Defaults to 0.3.
-            img_out_dir (str): Output directory of visualization results.
-                If left as empty, no file will be saved. Defaults to ''.
-            print_result (bool): Whether to print the inference result w/o
-                visualization to the console. Defaults to False.
-            pred_out_file (str): File to save the inference results w/o
-                visualization. If left as empty, no file will be saved.
-                Defaults to ''.
-            **kwargs: Other keyword arguments passed to :meth:`preprocess`,
                :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
                Each key in kwargs should be in the corresponding set of
                ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
                and ``postprocess_kwargs``.
        Returns:
            dict: Inference and visualization results.
        """
-        return super().__call__(
-            inputs,
+        (
-            return_datasamples,
+            preprocess_kwargs,
-            batch_size,
+            forward_kwargs,
-            return_vis=return_vis,
+            visualize_kwargs,
-            show=show,
+            postprocess_kwargs,
-            wait_time=wait_time,
+        ) = self._dispatch_kwargs(**kwargs)
-            draw_pred=draw_pred,
-            pred_score_thr=pred_score_thr,
+        cam_type = preprocess_kwargs.pop('cam_type', 'CAM2')
-            img_out_dir=img_out_dir,
+        ori_inputs = self._inputs_to_list(inputs, cam_type=cam_type)
-            print_result=print_result,
+        inputs = self.preprocess(
-            pred_out_file=pred_out_file,
+            ori_inputs, batch_size=batch_size, **preprocess_kwargs)
-            **kwargs)
+        preds = []
+        results_dict = {'predictions': [], 'visualization': []}
+        for data in (track(inputs, description='Inference')
+                     if self.show_progress else inputs):
+            preds.extend(self.forward(data, **forward_kwargs))
+            visualization = self.visualize(ori_inputs, preds,
+                                           **visualize_kwargs)
+            results = self.postprocess(preds, visualization,
+                                       return_datasamples,
+                                       **postprocess_kwargs)
+            results_dict['predictions'].extend(results['predictions'])
+            if results['visualization'] is not None:
+                results_dict['visualization'].extend(results['visualization'])
+        return results_dict
    def postprocess(
        self,
@@ -238,7 +226,8 @@ class Base3DInferencer(BaseInferencer):
        visualization: Optional[List[np.ndarray]] = None,
        return_datasample: bool = False,
        print_result: bool = False,
-        pred_out_file: str = '',
+        no_save_pred: bool = False,
+        pred_out_dir: str = '',
    ) -> Union[ResType, Tuple[ResType, np.ndarray]]:
        """Process the predictions and visualization results from ``forward``
        and ``visualize``.
@@ -258,7 +247,7 @@ class Base3DInferencer(BaseInferencer):
                Defaults to False.
            print_result (bool): Whether to print the inference result w/o
                visualization to the console. Defaults to False.
-            pred_out_file (str): File to save the inference results w/o
+            pred_out_dir (str): Directory to save the inference results w/o
                visualization. If left as empty, no file will be saved.
                Defaults to ''.
@@ -273,35 +262,56 @@ class Base3DInferencer(BaseInferencer):
              json-serializable dict containing only basic data elements such
              as strings and numbers.
        """
+        if no_save_pred is True:
+            pred_out_dir = ''
        result_dict = {}
        results = preds
        if not return_datasample:
            results = []
            for pred in preds:
-                result = self.pred2dict(pred)
+                result = self.pred2dict(pred, pred_out_dir)
                results.append(result)
+        elif pred_out_dir != '':
+            print_log(
+                'Currently does not support saving datasample '
+                'when return_datasample is set to True. '
+                'Prediction results are not saved!',
+                level=logging.WARNING)
+        # Add img to the results after printing and dumping
        result_dict['predictions'] = results
        if print_result:
            print(result_dict)
-        if pred_out_file != '':
-            mmengine.dump(result_dict, pred_out_file)
        result_dict['visualization'] = visualization
        return result_dict
-    def pred2dict(self, data_sample: InstanceData) -> Dict:
+    # TODO: The data format and fields saved in json need further discussion.
+    #  Maybe should include model name, timestamp, filename, image info etc.
+    def pred2dict(self,
+                  data_sample: Det3DDataSample,
+                  pred_out_dir: str = '') -> Dict:
        """Extract elements necessary to represent a prediction into a
        dictionary.
        It's better to contain only basic data elements such as strings and
        numbers in order to guarantee it's json-serializable.
+        Args:
+            data_sample (:obj:`DetDataSample`): Predictions of the model.
+            pred_out_dir: Dir to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+        Returns:
+            dict: Prediction results.
        """
        result = {}
        if 'pred_instances_3d' in data_sample:
            pred_instances_3d = data_sample.pred_instances_3d.numpy()
            result = {
-                'bboxes_3d': pred_instances_3d.bboxes_3d.tensor.cpu().tolist(),
                'labels_3d': pred_instances_3d.labels_3d.tolist(),
-                'scores_3d': pred_instances_3d.scores_3d.tolist()
+                'scores_3d': pred_instances_3d.scores_3d.tolist(),
+                'bboxes_3d': pred_instances_3d.bboxes_3d.tensor.cpu().tolist()
            }
        if 'pred_pts_seg' in data_sample:
@@ -309,4 +319,28 @@ class Base3DInferencer(BaseInferencer):
            result['pts_semantic_mask'] = \
                pred_pts_seg.pts_semantic_mask.tolist()
+        if data_sample.box_mode_3d == Box3DMode.LIDAR:
+            result['box_type_3d'] = 'LiDAR'
+        elif data_sample.box_mode_3d == Box3DMode.CAM:
+            result['box_type_3d'] = 'Camera'
+        elif data_sample.box_mode_3d == Box3DMode.DEPTH:
+            result['box_type_3d'] = 'Depth'
+        if pred_out_dir != '':
+            if 'lidar_path' in data_sample:
+                lidar_path = osp.basename(data_sample.lidar_path)
+                lidar_path = osp.splitext(lidar_path)[0]
+                out_json_path = osp.join(pred_out_dir, 'preds',
+                                         lidar_path + '.json')
+            elif 'img_path' in data_sample:
+                img_path = osp.basename(data_sample.img_path)
+                img_path = osp.splitext(img_path)[0]
+                out_json_path = osp.join(pred_out_dir, 'preds',
+                                         img_path + '.json')
+            else:
+                out_json_path = osp.join(
+                    pred_out_dir, 'preds',
+                    f'{str(self.num_visualized_imgs).zfill(8)}.json')
+            dump(result, out_json_path)
        return result
--- a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py
@@ -4,11 +4,16 @@ from typing import Dict, List, Optional, Sequence, Union
 import mmengine
 import numpy as np
+import torch
 from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
 from mmengine.infer.infer import ModelType
 from mmengine.structures import InstanceData
 from mmdet3d.registry import INFERENCERS
+from mmdet3d.structures import (CameraInstance3DBoxes, DepthInstance3DBoxes,
+                                Det3DDataSample, LiDARInstance3DBoxes)
 from mmdet3d.utils import ConfigType
 from .base_3d_inferencer import Base3DInferencer
@@ -43,16 +48,6 @@ class LidarDet3DInferencer(Base3DInferencer):
            priority is palette -> config -> checkpoint. Defaults to 'none'.
    """
-    preprocess_kwargs: set = set()
-    forward_kwargs: set = set()
-    visualize_kwargs: set = {
-        'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
-        'img_out_dir'
-    }
-    postprocess_kwargs: set = {
-        'print_result', 'pred_out_file', 'return_datasample'
-    }
    def __init__(self,
                 model: Union[ModelType, str, None] = None,
                 weights: Optional[str] = None,
@@ -69,7 +64,7 @@ class LidarDet3DInferencer(Base3DInferencer):
            scope=scope,
            palette=palette)
-    def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
+    def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list:
        """Preprocess the inputs to a list.
        Preprocess inputs to a list according to its type:
@@ -87,7 +82,22 @@ class LidarDet3DInferencer(Base3DInferencer):
        Returns:
            list: List of input for the :meth:`preprocess`.
        """
-        return super()._inputs_to_list(inputs, modality_key='points')
+        if isinstance(inputs, dict) and isinstance(inputs['points'], str):
+            pcd = inputs['points']
+            backend = get_file_backend(pcd)
+            if hasattr(backend, 'isdir') and isdir(pcd):
+                # Backends like HttpsBackend do not implement `isdir`, so
+                # only those backends that implement `isdir` could accept
+                # the inputs as a directory
+                filename_list = list_dir_or_file(pcd, list_dir=False)
+                inputs = [{
+                    'points': join_path(pcd, filename)
+                } for filename in filename_list]
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+        return list(inputs)
    def _init_pipeline(self, cfg: ConfigType) -> Compose:
        """Initialize the test pipeline."""
@@ -113,9 +123,10 @@ class LidarDet3DInferencer(Base3DInferencer):
                  preds: PredType,
                  return_vis: bool = False,
                  show: bool = False,
-                  wait_time: int = 0,
+                  wait_time: int = -1,
                  draw_pred: bool = True,
                  pred_score_thr: float = 0.3,
+                  no_save_vis: bool = False,
                  img_out_dir: str = '') -> Union[List[np.ndarray], None]:
        """Visualize predictions.
@@ -126,11 +137,13 @@ class LidarDet3DInferencer(Base3DInferencer):
                Defaults to False.
            show (bool): Whether to display the image in a popup window.
                Defaults to False.
-            wait_time (float): The interval of show (s). Defaults to 0.
+            wait_time (float): The interval of show (s). Defaults to -1.
            draw_pred (bool): Whether to draw predicted bounding boxes.
                Defaults to True.
            pred_score_thr (float): Minimum score of bboxes to draw.
                Defaults to 0.3.
+            no_save_vis (bool): Whether to force not to save prediction
+                vis results. Defaults to False.
            img_out_dir (str): Output directory of visualization results.
                If left as empty, no file will be saved. Defaults to ''.
@@ -138,8 +151,10 @@ class LidarDet3DInferencer(Base3DInferencer):
            List[np.ndarray] or None: Returns visualization results only if
            applicable.
        """
-        if self.visualizer is None or (not show and img_out_dir == ''
+        if no_save_vis is True:
-                                       and not return_vis):
+            img_out_dir = ''
+        if not show and img_out_dir == '' and not return_vis:
            return None
        if getattr(self, 'visualizer') is None:
@@ -160,13 +175,16 @@ class LidarDet3DInferencer(Base3DInferencer):
            elif isinstance(single_input, np.ndarray):
                points = single_input.copy()
                pc_num = str(self.num_visualized_frames).zfill(8)
-                pc_name = f'pc_{pc_num}.png'
+                pc_name = f'{pc_num}.png'
            else:
                raise ValueError('Unsupported input type: '
                                 f'{type(single_input)}')
-            o3d_save_path = osp.join(img_out_dir, pc_name) \
+            if img_out_dir != '' and show:
-                if img_out_dir != '' else None
+                o3d_save_path = osp.join(img_out_dir, 'vis_lidar', pc_name)
+                mmengine.mkdir_or_exist(osp.dirname(o3d_save_path))
+            else:
+                o3d_save_path = None
            data_input = dict(points=points)
            self.visualizer.add_datasample(
@@ -185,3 +203,40 @@ class LidarDet3DInferencer(Base3DInferencer):
            self.num_visualized_frames += 1
        return results
+    def visualize_preds_fromfile(self, inputs: InputsType, preds: PredType,
+                                 **kwargs) -> Union[List[np.ndarray], None]:
+        """Visualize predictions from `*.json` files.
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+            preds (PredType): Predictions of the model.
+        Returns:
+            List[np.ndarray] or None: Returns visualization results only if
+            applicable.
+        """
+        data_samples = []
+        for pred in preds:
+            pred = mmengine.load(pred)
+            data_sample = Det3DDataSample()
+            data_sample.pred_instances_3d = InstanceData()
+            data_sample.pred_instances_3d.labels_3d = torch.tensor(
+                pred['labels_3d'])
+            data_sample.pred_instances_3d.scores_3d = torch.tensor(
+                pred['scores_3d'])
+            if pred['box_type_3d'] == 'LiDAR':
+                data_sample.pred_instances_3d.bboxes_3d = \
+                    LiDARInstance3DBoxes(pred['bboxes_3d'])
+            elif pred['box_type_3d'] == 'Camera':
+                data_sample.pred_instances_3d.bboxes_3d = \
+                    CameraInstance3DBoxes(pred['bboxes_3d'])
+            elif pred['box_type_3d'] == 'Depth':
+                data_sample.pred_instances_3d.bboxes_3d = \
+                    DepthInstance3DBoxes(pred['bboxes_3d'])
+            else:
+                raise ValueError('Unsupported box type: '
+                                 f'{pred["box_type_3d"]}')
+            data_samples.append(data_sample)
+        return self.visualize(inputs=inputs, preds=data_samples, **kwargs)
--- a/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py
@@ -5,6 +5,8 @@ from typing import Dict, List, Optional, Sequence, Union
 import mmengine
 import numpy as np
 from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
 from mmengine.infer.infer import ModelType
 from mmengine.structures import InstanceData
@@ -43,16 +45,6 @@ class LidarSeg3DInferencer(Base3DInferencer):
            priority is palette -> config -> checkpoint. Defaults to 'none'.
    """
-    preprocess_kwargs: set = set()
-    forward_kwargs: set = set()
-    visualize_kwargs: set = {
-        'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
-        'img_out_dir'
-    }
-    postprocess_kwargs: set = {
-        'print_result', 'pred_out_file', 'return_datasample'
-    }
    def __init__(self,
                 model: Union[ModelType, str, None] = None,
                 weights: Optional[str] = None,
@@ -69,7 +61,7 @@ class LidarSeg3DInferencer(Base3DInferencer):
            scope=scope,
            palette=palette)
-    def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
+    def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list:
        """Preprocess the inputs to a list.
        Preprocess inputs to a list according to its type:
@@ -87,7 +79,22 @@ class LidarSeg3DInferencer(Base3DInferencer):
        Returns:
            list: List of input for the :meth:`preprocess`.
        """
-        return super()._inputs_to_list(inputs, modality_key='points')
+        if isinstance(inputs, dict) and isinstance(inputs['points'], str):
+            pcd = inputs['points']
+            backend = get_file_backend(pcd)
+            if hasattr(backend, 'isdir') and isdir(pcd):
+                # Backends like HttpsBackend do not implement `isdir`, so
+                # only those backends that implement `isdir` could accept
+                # the inputs as a directory
+                filename_list = list_dir_or_file(pcd, list_dir=False)
+                inputs = [{
+                    'points': join_path(pcd, filename)
+                } for filename in filename_list]
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+        return list(inputs)
    def _init_pipeline(self, cfg: ConfigType) -> Compose:
        """Initialize the test pipeline."""
@@ -124,6 +131,7 @@ class LidarSeg3DInferencer(Base3DInferencer):
                  wait_time: int = 0,
                  draw_pred: bool = True,
                  pred_score_thr: float = 0.3,
+                  no_save_vis: bool = False,
                  img_out_dir: str = '') -> Union[List[np.ndarray], None]:
        """Visualize predictions.
@@ -139,6 +147,7 @@ class LidarSeg3DInferencer(Base3DInferencer):
                Defaults to True.
            pred_score_thr (float): Minimum score of bboxes to draw.
                Defaults to 0.3.
+            no_save_vis (bool): Whether to save visualization results.
            img_out_dir (str): Output directory of visualization results.
                If left as empty, no file will be saved. Defaults to ''.
@@ -146,8 +155,10 @@ class LidarSeg3DInferencer(Base3DInferencer):
            List[np.ndarray] or None: Returns visualization results only if
            applicable.
        """
-        if self.visualizer is None or (not show and img_out_dir == ''
+        if no_save_vis is True:
-                                       and not return_vis):
+            img_out_dir = ''
+        if not show and img_out_dir == '' and not return_vis:
            return None
        if getattr(self, 'visualizer') is None:
@@ -168,13 +179,16 @@ class LidarSeg3DInferencer(Base3DInferencer):
            elif isinstance(single_input, np.ndarray):
                points = single_input.copy()
                pc_num = str(self.num_visualized_frames).zfill(8)
-                pc_name = f'pc_{pc_num}.png'
+                pc_name = f'{pc_num}.png'
            else:
                raise ValueError('Unsupported input type: '
                                 f'{type(single_input)}')
-            o3d_save_path = osp.join(img_out_dir, pc_name) \
+            if img_out_dir != '' and show:
-                if img_out_dir != '' else None
+                o3d_save_path = osp.join(img_out_dir, 'vis_lidar', pc_name)
+                mmengine.mkdir_or_exist(osp.dirname(o3d_save_path))
+            else:
+                o3d_save_path = None
            data_input = dict(points=points)
            self.visualizer.add_datasample(

--- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
@@ -6,6 +6,8 @@ import mmcv
 import mmengine
 import numpy as np
 from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
 from mmengine.infer.infer import ModelType
 from mmengine.structures import InstanceData
@@ -44,16 +46,6 @@ class MonoDet3DInferencer(Base3DInferencer):
            priority is palette -> config -> checkpoint. Defaults to 'none'.
    """
-    preprocess_kwargs: set = set()
-    forward_kwargs: set = set()
-    visualize_kwargs: set = {
-        'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
-        'img_out_dir'
-    }
-    postprocess_kwargs: set = {
-        'print_result', 'pred_out_file', 'return_datasample'
-    }
    def __init__(self,
                 model: Union[ModelType, str, None] = None,
                 weights: Optional[str] = None,
@@ -70,7 +62,10 @@ class MonoDet3DInferencer(Base3DInferencer):
            scope=scope,
            palette=palette)
-    def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
+    def _inputs_to_list(self,
+                        inputs: Union[dict, list],
+                        cam_type='CAM2',
+                        **kwargs) -> list:
        """Preprocess the inputs to a list.
        Preprocess inputs to a list according to its type:
@@ -88,7 +83,79 @@ class MonoDet3DInferencer(Base3DInferencer):
        Returns:
            list: List of input for the :meth:`preprocess`.
        """
-        return super()._inputs_to_list(inputs, modality_key='img')
+        if isinstance(inputs, dict):
+            assert 'infos' in inputs
+            infos = inputs.pop('infos')
+            if isinstance(inputs['img'], str):
+                img = inputs['img']
+                backend = get_file_backend(img)
+                if hasattr(backend, 'isdir') and isdir(img):
+                    # Backends like HttpsBackend do not implement `isdir`, so
+                    # only those backends that implement `isdir` could accept
+                    # the inputs as a directory
+                    filename_list = list_dir_or_file(img, list_dir=False)
+                    inputs = [{
+                        'img': join_path(img, filename)
+                    } for filename in filename_list]
+            if not isinstance(inputs, (list, tuple)):
+                inputs = [inputs]
+            # get cam2img, lidar2cam and lidar2img from infos
+            info_list = mmengine.load(infos)['data_list']
+            assert len(info_list) == len(inputs)
+            for index, input in enumerate(inputs):
+                data_info = info_list[index]
+                img_path = data_info['images'][cam_type]['img_path']
+                if isinstance(input['img'], str) and \
+                        osp.basename(img_path) != osp.basename(input['img']):
+                    raise ValueError(
+                        f'the info file of {img_path} is not provided.')
+                cam2img = np.asarray(
+                    data_info['images'][cam_type]['cam2img'], dtype=np.float32)
+                lidar2cam = np.asarray(
+                    data_info['images'][cam_type]['lidar2cam'],
+                    dtype=np.float32)
+                if 'lidar2img' in data_info['images'][cam_type]:
+                    lidar2img = np.asarray(
+                        data_info['images'][cam_type]['lidar2img'],
+                        dtype=np.float32)
+                else:
+                    lidar2img = cam2img @ lidar2cam
+                input['cam2img'] = cam2img
+                input['lidar2cam'] = lidar2cam
+                input['lidar2img'] = lidar2img
+        elif isinstance(inputs, (list, tuple)):
+            # get cam2img, lidar2cam and lidar2img from infos
+            for input in inputs:
+                assert 'infos' in input
+                infos = input.pop('infos')
+                info_list = mmengine.load(infos)['data_list']
+                assert len(info_list) == 1, 'Only support single sample info' \
+                    'in `.pkl`, when inputs is a list.'
+                data_info = info_list[0]
+                img_path = data_info['images'][cam_type]['img_path']
+                if isinstance(input['img'], str) and \
+                        osp.basename(img_path) != osp.basename(input['img']):
+                    raise ValueError(
+                        f'the info file of {img_path} is not provided.')
+                cam2img = np.asarray(
+                    data_info['images'][cam_type]['cam2img'], dtype=np.float32)
+                lidar2cam = np.asarray(
+                    data_info['images'][cam_type]['lidar2cam'],
+                    dtype=np.float32)
+                if 'lidar2img' in data_info['images'][cam_type]:
+                    lidar2img = np.asarray(
+                        data_info['images'][cam_type]['lidar2img'],
+                        dtype=np.float32)
+                else:
+                    lidar2img = cam2img @ lidar2cam
+                input['cam2img'] = cam2img
+                input['lidar2cam'] = lidar2cam
+                input['lidar2img'] = lidar2img
+        return list(inputs)
    def _init_pipeline(self, cfg: ConfigType) -> Compose:
        """Initialize the test pipeline."""
@@ -110,7 +177,9 @@ class MonoDet3DInferencer(Base3DInferencer):
                  wait_time: int = 0,
                  draw_pred: bool = True,
                  pred_score_thr: float = 0.3,
-                  img_out_dir: str = '') -> Union[List[np.ndarray], None]:
+                  no_save_vis: bool = False,
+                  img_out_dir: str = '',
+                  cam_type_dir: str = 'CAM2') -> Union[List[np.ndarray], None]:
        """Visualize predictions.
        Args:
@@ -125,15 +194,19 @@ class MonoDet3DInferencer(Base3DInferencer):
                Defaults to True.
            pred_score_thr (float): Minimum score of bboxes to draw.
                Defaults to 0.3.
+            no_save_vis (bool): Whether to save visualization results.
            img_out_dir (str): Output directory of visualization results.
                If left as empty, no file will be saved. Defaults to ''.
+            cam_type_dir (str): Camera type directory. Defaults to 'CAM2'.
        Returns:
            List[np.ndarray] or None: Returns visualization results only if
            applicable.
        """
-        if self.visualizer is None or (not show and img_out_dir == ''
+        if no_save_vis is True:
-                                       and not return_vis):
+            img_out_dir = ''
+        if not show and img_out_dir == '' and not return_vis:
            return None
        if getattr(self, 'visualizer') is None:
@@ -156,8 +229,8 @@ class MonoDet3DInferencer(Base3DInferencer):
                raise ValueError('Unsupported input type: '
                                 f"{type(single_input['img'])}")
-            out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \
+            out_file = osp.join(img_out_dir, 'vis_camera', cam_type_dir,
-                else None
+                                img_name) if img_out_dir != '' else None
            data_input = dict(img=img)
            self.visualizer.add_datasample(

--- a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py
@@ -7,6 +7,8 @@ import mmcv
 import mmengine
 import numpy as np
 from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
 from mmengine.infer.infer import ModelType
 from mmengine.structures import InstanceData
@@ -44,16 +46,6 @@ class MultiModalityDet3DInferencer(Base3DInferencer):
        palette (str): The palette of visualization. Defaults to 'none'.
    """
-    preprocess_kwargs: set = set()
-    forward_kwargs: set = set()
-    visualize_kwargs: set = {
-        'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
-        'img_out_dir'
-    }
-    postprocess_kwargs: set = {
-        'print_result', 'pred_out_file', 'return_datasample'
-    }
    def __init__(self,
                 model: Union[ModelType, str, None] = None,
                 weights: Optional[str] = None,
@@ -70,7 +62,10 @@ class MultiModalityDet3DInferencer(Base3DInferencer):
            scope=scope,
            palette=palette)
-    def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
+    def _inputs_to_list(self,
+                        inputs: Union[dict, list],
+                        cam_type: str = 'CAM2',
+                        **kwargs) -> list:
        """Preprocess the inputs to a list.
        Preprocess inputs to a list according to its type:
@@ -88,7 +83,86 @@ class MultiModalityDet3DInferencer(Base3DInferencer):
        Returns:
            list: List of input for the :meth:`preprocess`.
        """
-        return super()._inputs_to_list(inputs, modality_key=['points', 'img'])
+        if isinstance(inputs, dict):
+            assert 'infos' in inputs
+            infos = inputs.pop('infos')
+            if isinstance(inputs['img'], str):
+                img, pcd = inputs['img'], inputs['points']
+                backend = get_file_backend(img)
+                if hasattr(backend, 'isdir') and isdir(img) and isdir(pcd):
+                    # Backends like HttpsBackend do not implement `isdir`, so
+                    # only those backends that implement `isdir` could accept
+                    # the inputs as a directory
+                    img_filename_list = list_dir_or_file(
+                        img, list_dir=False, suffix=['.png', '.jpg'])
+                    pcd_filename_list = list_dir_or_file(
+                        pcd, list_dir=False, suffix='.bin')
+                    assert len(img_filename_list) == len(pcd_filename_list)
+                    inputs = [{
+                        'img': join_path(img, img_filename),
+                        'points': join_path(pcd, pcd_filename)
+                    } for pcd_filename, img_filename in zip(
+                        pcd_filename_list, img_filename_list)]
+            if not isinstance(inputs, (list, tuple)):
+                inputs = [inputs]
+            # get cam2img, lidar2cam and lidar2img from infos
+            info_list = mmengine.load(infos)['data_list']
+            assert len(info_list) == len(inputs)
+            for index, input in enumerate(inputs):
+                data_info = info_list[index]
+                img_path = data_info['images'][cam_type]['img_path']
+                if isinstance(input['img'], str) and \
+                        osp.basename(img_path) != osp.basename(input['img']):
+                    raise ValueError(
+                        f'the info file of {img_path} is not provided.')
+                cam2img = np.asarray(
+                    data_info['images'][cam_type]['cam2img'], dtype=np.float32)
+                lidar2cam = np.asarray(
+                    data_info['images'][cam_type]['lidar2cam'],
+                    dtype=np.float32)
+                if 'lidar2img' in data_info['images'][cam_type]:
+                    lidar2img = np.asarray(
+                        data_info['images'][cam_type]['lidar2img'],
+                        dtype=np.float32)
+                else:
+                    lidar2img = cam2img @ lidar2cam
+                input['cam2img'] = cam2img
+                input['lidar2cam'] = lidar2cam
+                input['lidar2img'] = lidar2img
+        elif isinstance(inputs, (list, tuple)):
+            # get cam2img, lidar2cam and lidar2img from infos
+            for input in inputs:
+                assert 'infos' in input
+                infos = input.pop('infos')
+                info_list = mmengine.load(infos)['data_list']
+                assert len(info_list) == 1, 'Only support single sample' \
+                    'info in `.pkl`, when input is a list.'
+                data_info = info_list[0]
+                img_path = data_info['images'][cam_type]['img_path']
+                if isinstance(input['img'], str) and \
+                        osp.basename(img_path) != osp.basename(input['img']):
+                    raise ValueError(
+                        f'the info file of {img_path} is not provided.')
+                cam2img = np.asarray(
+                    data_info['images'][cam_type]['cam2img'], dtype=np.float32)
+                lidar2cam = np.asarray(
+                    data_info['images'][cam_type]['lidar2cam'],
+                    dtype=np.float32)
+                if 'lidar2img' in data_info['images'][cam_type]:
+                    lidar2img = np.asarray(
+                        data_info['images'][cam_type]['lidar2img'],
+                        dtype=np.float32)
+                else:
+                    lidar2img = cam2img @ lidar2cam
+                input['cam2img'] = cam2img
+                input['lidar2cam'] = lidar2cam
+                input['lidar2img'] = lidar2img
+        return list(inputs)
    def _init_pipeline(self, cfg: ConfigType) -> Compose:
        """Initialize the test pipeline."""
@@ -144,7 +218,9 @@ class MultiModalityDet3DInferencer(Base3DInferencer):
                  wait_time: int = 0,
                  draw_pred: bool = True,
                  pred_score_thr: float = 0.3,
-                  img_out_dir: str = '') -> Union[List[np.ndarray], None]:
+                  no_save_vis: bool = False,
+                  img_out_dir: str = '',
+                  cam_type_dir: str = 'CAM2') -> Union[List[np.ndarray], None]:
        """Visualize predictions.
        Args:
@@ -157,6 +233,7 @@ class MultiModalityDet3DInferencer(Base3DInferencer):
            wait_time (float): The interval of show (s). Defaults to 0.
            draw_pred (bool): Whether to draw predicted bounding boxes.
                Defaults to True.
+            no_save_vis (bool): Whether to save visualization results.
            pred_score_thr (float): Minimum score of bboxes to draw.
                Defaults to 0.3.
            img_out_dir (str): Output directory of visualization results.
@@ -166,8 +243,10 @@ class MultiModalityDet3DInferencer(Base3DInferencer):
            List[np.ndarray] or None: Returns visualization results only if
            applicable.
        """
-        if self.visualizer is None or (not show and img_out_dir == ''
+        if no_save_vis is True:
-                                       and not return_vis):
+            img_out_dir = ''
+        if not show and img_out_dir == '' and not return_vis:
            return None
        if getattr(self, 'visualizer') is None:
@@ -188,13 +267,16 @@ class MultiModalityDet3DInferencer(Base3DInferencer):
            elif isinstance(points_input, np.ndarray):
                points = points_input.copy()
                pc_num = str(self.num_visualized_frames).zfill(8)
-                pc_name = f'pc_{pc_num}.png'
+                pc_name = f'{pc_num}.png'
            else:
                raise ValueError('Unsupported input type: '
                                 f'{type(points_input)}')
-            o3d_save_path = osp.join(img_out_dir, pc_name) \
+            if img_out_dir != '' and show:
-                if img_out_dir != '' else None
+                o3d_save_path = osp.join(img_out_dir, 'vis_lidar', pc_name)
+                mmengine.mkdir_or_exist(osp.dirname(o3d_save_path))
+            else:
+                o3d_save_path = None
            img_input = single_input['img']
            if isinstance(single_input['img'], str):
@@ -210,8 +292,8 @@ class MultiModalityDet3DInferencer(Base3DInferencer):
                raise ValueError('Unsupported input type: '
                                 f'{type(img_input)}')
-            out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \
+            out_file = osp.join(img_out_dir, 'vis_camera', cam_type_dir,
-                else None
+                                img_name) if img_out_dir != '' else None
            data_input = dict(points=points, img=img)
            self.visualizer.add_datasample(

--- a/mmdet3d/configs/_base_/__init__.py
+++ b/mmdet3d/configs/_base_/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
--- a/mmdet3d/configs/_base_/datasets/__init__.py
+++ b/mmdet3d/configs/_base_/datasets/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
--- a/mmdet3d/configs/_base_/models/__init__.py
+++ b/mmdet3d/configs/_base_/models/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
--- a/mmdet3d/configs/_base_/models/cylinder3d.py
+++ b/mmdet3d/configs/_base_/models/cylinder3d.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmdet3d.models import Cylinder3D
+from mmdet3d.models.backbones import Asymm3DSpconv
+from mmdet3d.models.data_preprocessors import Det3DDataPreprocessor
+from mmdet3d.models.decode_heads.cylinder3d_head import Cylinder3DHead
+from mmdet3d.models.losses import LovaszLoss
+from mmdet3d.models.voxel_encoders import SegVFE
+grid_shape = [480, 360, 32]
+model = dict(
+    type=Cylinder3D,
+    data_preprocessor=dict(
+        type=Det3DDataPreprocessor,
+        voxel=True,
+        voxel_type='cylindrical',
+        voxel_layer=dict(
+            grid_shape=grid_shape,
+            point_cloud_range=[0, -3.14159265359, -4, 50, 3.14159265359, 2],
+            max_num_points=-1,
+            max_voxels=-1,
+        ),
+    ),
+    voxel_encoder=dict(
+        type=SegVFE,
+        feat_channels=[64, 128, 256, 256],
+        in_channels=6,
+        with_voxel_center=True,
+        feat_compression=16,
+        return_point_feats=False),
+    backbone=dict(
+        type=Asymm3DSpconv,
+        grid_size=grid_shape,
+        input_channels=16,
+        base_channels=32,
+        norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.1)),
+    decode_head=dict(
+        type=Cylinder3DHead,
+        channels=128,
+        num_classes=20,
+        loss_ce=dict(
+            type='mmdet.CrossEntropyLoss',
+            use_sigmoid=False,
+            class_weight=None,
+            loss_weight=1.0),
+        loss_lovasz=dict(type=LovaszLoss, loss_weight=1.0, reduction='none'),
+    ),
+    train_cfg=None,
+    test_cfg=dict(mode='whole'),
+)
--- a/mmdet3d/configs/_base_/schedules/__init__.py
+++ b/mmdet3d/configs/_base_/schedules/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
--- a/mmdet3d/configs/centerpoint/__init__.py
+++ b/mmdet3d/configs/centerpoint/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
--- a/mmdet3d/configs/centerpoint/centerpoint_pillar02_second_secfpn_8xb4_cyclic_20e_nus_3d.py
+++ b/mmdet3d/configs/centerpoint/centerpoint_pillar02_second_secfpn_8xb4_cyclic_20e_nus_3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
-if '_base_':
+from mmengine.config import read_base
+with read_base():
    from .._base_.datasets.nus_3d import *
    from .._base_.models.centerpoint_pillar02_second_secfpn_nus import *
    from .._base_.schedules.cyclic_20e import *
@@ -29,9 +31,9 @@ class_names = [
    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
 ]
-data_prefix.merge(
+data_prefix.update(
    dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP'))
-model.merge(
+model.update(
    dict(
        data_preprocessor=dict(
            voxel_layer=dict(point_cloud_range=point_cloud_range)),
@@ -167,13 +169,13 @@ train_dataloader.merge(
                # and box_type_3d='Depth' in sunrgbd and scannet dataset.
                box_type_3d='LiDAR',
                backend_args=backend_args))))
-test_dataloader.merge(
+test_dataloader.update(
    dict(
        dataset=dict(
            pipeline=test_pipeline, metainfo=dict(classes=class_names))))
-val_dataloader.merge(
+val_dataloader.update(
    dict(
        dataset=dict(
            pipeline=test_pipeline, metainfo=dict(classes=class_names))))
-train_cfg.merge(dict(val_interval=20))
+train_cfg.update(dict(val_interval=20))
--- a/mmdet3d/configs/centerpoint/centerpoint_voxel01_second_secfpn_8xb4_cyclic_20e_nus_3d.py
+++ b/mmdet3d/configs/centerpoint/centerpoint_voxel01_second_secfpn_8xb4_cyclic_20e_nus_3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
-if '_base_':
+from mmengine import read_base
+with read_base():
    from .._base_.datasets.nus_3d import *
    from .._base_.models.centerpoint_voxel01_second_secfpn_nus import *
    from .._base_.schedules.cyclic_20e import *
@@ -29,9 +31,9 @@ class_names = [
    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
 ]
-data_prefix.merge(
+data_prefix.update(
    dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP'))
-model.merge(
+model.update(
    dict(
        data_preprocessor=dict(
            voxel_layer=dict(point_cloud_range=point_cloud_range)),
@@ -167,13 +169,13 @@ train_dataloader.merge(
                # and box_type_3d='Depth' in sunrgbd and scannet dataset.
                box_type_3d='LiDAR',
                backend_args=backend_args))))
-test_dataloader.merge(
+test_dataloader.update(
    dict(
        dataset=dict(
            pipeline=test_pipeline, metainfo=dict(classes=class_names))))
-val_dataloader.merge(
+val_dataloader.update(
    dict(
        dataset=dict(
            pipeline=test_pipeline, metainfo=dict(classes=class_names))))
-train_cfg.merge(dict(val_interval=20))
+train_cfg.update(dict(val_interval=20))
--- a/mmdet3d/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py
+++ b/mmdet3d/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine import read_base
+with read_base():
+    from .._base_.datasets.semantickitti import *
+    from .._base_.models.cylinder3d import *
+    from .._base_.default_runtime import *
+from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import AdamW
+# optimizer
+lr = 0.001
+optim_wrapper = dict(
+    type=OptimWrapper, optimizer=dict(type=AdamW, lr=lr, weight_decay=0.01))
+train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=36, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=36,
+        by_epoch=True,
+        milestones=[30],
+        gamma=0.1)
+]
+train_dataloader.update(dict(batch_size=4, ))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (4 samples per GPU).
+# auto_scale_lr = dict(enable=False, base_batch_size=32)
+default_hooks.update(dict(checkpoint=dict(type=CheckpointHook, interval=5)))
--- a/mmdet3d/configs/cylinder3d/cylinder3d_8xb2-laser-polar-mix-3x_semantickitti.py
+++ b/mmdet3d/configs/cylinder3d/cylinder3d_8xb2-laser-polar-mix-3x_semantickitti.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine import read_base
+with read_base():
+    from .._base_.datasets.semantickitti import *
+    from .._base_.default_runtime import *
+    from .._base_.models.cylinder3d import *
+    from .._base_.schedules.schedule_3x import *
+from mmcv.transforms.wrappers import RandomChoice
+from mmdet3d.datasets.transforms.transforms_3d import LaserMix, PolarMix
+train_pipeline = [
+    dict(type=LoadPointsFromFile, coord_type='LIDAR', load_dim=4, use_dim=4),
+    dict(
+        type=LoadAnnotations3D,
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_seg_3d=True,
+        seg_3d_dtype='np.int32',
+        seg_offset=2**16,
+        dataset_type='semantickitti'),
+    dict(type=PointSegClassMapping),
+    dict(
+        type=RandomChoice,
+        transforms=[
+            [
+                dict(
+                    type=LaserMix,
+                    num_areas=[3, 4, 5, 6],
+                    pitch_angles=[-25, 3],
+                    pre_transform=[
+                        dict(
+                            type=LoadPointsFromFile,
+                            coord_type='LIDAR',
+                            load_dim=4,
+                            use_dim=4),
+                        dict(
+                            type=LoadAnnotations3D,
+                            with_bbox_3d=False,
+                            with_label_3d=False,
+                            with_seg_3d=True,
+                            seg_3d_dtype='np.int32',
+                            seg_offset=2**16,
+                            dataset_type='semantickitti'),
+                        dict(type=PointSegClassMapping)
+                    ],
+                    prob=1)
+            ],
+            [
+                dict(
+                    type=PolarMix,
+                    instance_classes=[0, 1, 2, 3, 4, 5, 6, 7],
+                    swap_ratio=0.5,
+                    rotate_paste_ratio=1.0,
+                    pre_transform=[
+                        dict(
+                            type=LoadPointsFromFile,
+                            coord_type='LIDAR',
+                            load_dim=4,
+                            use_dim=4),
+                        dict(
+                            type=LoadAnnotations3D,
+                            with_bbox_3d=False,
+                            with_label_3d=False,
+                            with_seg_3d=True,
+                            seg_3d_dtype='np.int32',
+                            seg_offset=2**16,
+                            dataset_type='semantickitti'),
+                        dict(type=PointSegClassMapping)
+                    ],
+                    prob=1)
+            ],
+        ],
+        prob=[0.5, 0.5]),
+    dict(
+        type=GlobalRotScaleTrans,
+        rot_range=[0., 6.28318531],
+        scale_ratio_range=[0.95, 1.05],
+        translation_std=[0, 0, 0],
+    ),
+    dict(type=Pack3DDetInputs, keys=['points', 'pts_semantic_mask'])
+]
+train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
+default_hooks.update(dict(checkpoint=dict(type=CheckpointHook, interval=1)))
--- a/mmdet3d/configs/minkunet/__init__.py
+++ b/mmdet3d/configs/minkunet/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
--- a/mmdet3d/configs/minkunet/minkunet34_w32_torchsparse_8xb2_laser_polar_mix_3x_semantickitti.py
+++ b/mmdet3d/configs/minkunet/minkunet34_w32_torchsparse_8xb2_laser_polar_mix_3x_semantickitti.py
 # Copyright (c) OpenMMLab. All rights reserved.
-if '_base_':
+from mmengine import read_base
+with read_base():
    from .._base_.datasets.semantickitti import *
    from .._base_.models.minkunet import *
    from .._base_.schedules.schedule_3x import *
@@ -15,7 +17,7 @@ from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
 from mmdet3d.datasets.transforms.transforms_3d import (GlobalRotScaleTrans,
                                                       LaserMix, PolarMix)
-model.merge(
+model.update(
    dict(
        data_preprocessor=dict(max_voxels=None),
        backbone=dict(encoder_blocks=[2, 3, 4, 6])))
@@ -92,6 +94,6 @@ train_pipeline = [
    dict(type=Pack3DDetInputs, keys=['points', 'pts_semantic_mask'])
 ]
-train_dataloader.merge(dict(dataset=dict(pipeline=train_pipeline)))
+train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
-default_hooks.merge(dict(checkpoint=dict(type=CheckpointHook, interval=1)))
+default_hooks.update(dict(checkpoint=dict(type=CheckpointHook, interval=1)))