Bump version to V1.0.0rc0

Bump version to V1.0.0rc0

Bump version to V1.0.0rc0
32a4328b · Wenwei Zhang · GitHub · 86cc487c · a8817998 · 32a4328b
Unverified Commit 32a4328b authored Feb 24, 2022 by Wenwei Zhang Committed by GitHub Feb 24, 2022
20 changed files
--- a/mmdet3d/datasets/lyft_dataset.py
+++ b/mmdet3d/datasets/lyft_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
+import os
+import tempfile
+from os import path as osp
+
 import mmcv
 import numpy as np
-import os
 import pandas as pd
-import tempfile
 from lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft
 from lyft_dataset_sdk.utils.data_classes import Box as LyftBox
-from os import path as osp
 from pyquaternion import Quaternion

 from mmdet3d.core.evaluation.lyft_eval import lyft_eval
@@ -129,7 +130,7 @@ class LyftDataset(Custom3DDataset):
            index (int): Index of the sample data to get.

        Returns:
-            dict: Data information that will be passed to the data \
+            dict: Data information that will be passed to the data
                preprocessing pipelines. It includes the following keys:

                - sample_idx (str): sample index
@@ -137,13 +138,13 @@ class LyftDataset(Custom3DDataset):
                - sweeps (list[dict]): infos of sweeps
                - timestamp (float): sample timestamp
                - img_filename (str, optional): image filename
-                - lidar2img (list[np.ndarray], optional): transformations \
+                - lidar2img (list[np.ndarray], optional): transformations
                    from lidar to different cameras
                - ann_info (dict): annotation info
        """
        info = self.data_infos[index]

-        # standard protocal modified from SECOND.Pytorch
+        # standard protocol modified from SECOND.Pytorch
        input_dict = dict(
            sample_idx=info['token'],
            pts_filename=info['lidar_path'],
@@ -190,7 +191,7 @@ class LyftDataset(Custom3DDataset):
        Returns:
            dict: Annotation information consists of the following keys:

-                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \
+                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
                    3D ground truth bboxes.
                - gt_labels_3d (np.ndarray): Labels of ground truths.
                - gt_names (list[str]): Class names of ground truths.
@@ -275,10 +276,11 @@ class LyftDataset(Custom3DDataset):

        Args:
            result_path (str): Path of the result file.
-            logger (logging.Logger | str | None): Logger used for printing
+            logger (logging.Logger | str, optional): Logger used for printing
                related information during evaluation. Default: None.
-            metric (str): Metric name used for evaluation. Default: 'bbox'.
-            result_name (str): Result name in the metric prefix.
+            metric (str, optional): Metric name used for evaluation.
+                Default: 'bbox'.
+            result_name (str, optional): Result name in the metric prefix.
                Default: 'pts_bbox'.

        Returns:
@@ -312,18 +314,18 @@ class LyftDataset(Custom3DDataset):

        Args:
            results (list[dict]): Testing results of the dataset.
-            jsonfile_prefix (str | None): The prefix of json files. It includes
+            jsonfile_prefix (str): The prefix of json files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
-            csv_savepath (str | None): The path for saving csv files.
+            csv_savepath (str): The path for saving csv files.
                It includes the file path and the csv filename,
                e.g., "a/b/filename.csv". If not specified,
                the result will not be converted to csv file.

        Returns:
-            tuple: Returns (result_files, tmp_dir), where `result_files` is a \
-                dict containing the json filepaths, `tmp_dir` is the temporal \
-                directory created for saving json files when \
+            tuple: Returns (result_files, tmp_dir), where `result_files` is a
+                dict containing the json filepaths, `tmp_dir` is the temporal
+                directory created for saving json files when
                `jsonfile_prefix` is not specified.
        """
        assert isinstance(results, list), 'results must be a list'
@@ -372,19 +374,22 @@ class LyftDataset(Custom3DDataset):

        Args:
            results (list[dict]): Testing results of the dataset.
-            metric (str | list[str]): Metrics to be evaluated.
-            logger (logging.Logger | str | None): Logger used for printing
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Default: 'bbox'.
+            logger (logging.Logger | str, optional): Logger used for printing
                related information during evaluation. Default: None.
-            jsonfile_prefix (str | None): The prefix of json files. It includes
+            jsonfile_prefix (str, optional): The prefix of json files including
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
-            csv_savepath (str | None): The path for saving csv files.
+            csv_savepath (str, optional): The path for saving csv files.
                It includes the file path and the csv filename,
                e.g., "a/b/filename.csv". If not specified,
                the result will not be converted to csv file.
-            show (bool): Whether to visualize.
+            result_names (list[str], optional): Result names in the
+                metric prefix. Default: ['pts_bbox'].
+            show (bool, optional): Whether to visualize.
                Default: False.
-            out_dir (str): Path to save the visualization results.
+            out_dir (str, optional): Path to save the visualization results.
                Default: None.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
@@ -407,8 +412,8 @@ class LyftDataset(Custom3DDataset):
        if tmp_dir is not None:
            tmp_dir.cleanup()

-        if show:
-            self.show(results, out_dir, pipeline=pipeline)
+        if show or out_dir:
+            self.show(results, out_dir, show=show, pipeline=pipeline)
        return results_dict

    def _build_default_pipeline(self):
@@ -432,13 +437,14 @@ class LyftDataset(Custom3DDataset):
        ]
        return Compose(pipeline)

-    def show(self, results, out_dir, show=True, pipeline=None):
+    def show(self, results, out_dir, show=False, pipeline=None):
        """Results visualization.

        Args:
            results (list[dict]): List of bounding boxes results.
            out_dir (str): Output directory of visualization result.
-            show (bool): Visualize the results online.
+            show (bool): Whether to visualize the results online.
+                Default: False.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
        """
@@ -517,16 +523,16 @@ def output_to_lyft_box(detection):
    box_gravity_center = box3d.gravity_center.numpy()
    box_dims = box3d.dims.numpy()
    box_yaw = box3d.yaw.numpy()
-    # TODO: check whether this is necessary
-    # with dir_offset & dir_limit in the head
-    box_yaw = -box_yaw - np.pi / 2
+
+    # our LiDAR coordinate system -> Lyft box coordinate system
+    lyft_box_dims = box_dims[:, [1, 0, 2]]

    box_list = []
    for i in range(len(box3d)):
        quat = Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
        box = LyftBox(
            box_gravity_center[i],
-            box_dims[i],
+            lyft_box_dims[i],
            quat,
            label=labels[i],
            score=scores[i])

--- a/mmdet3d/datasets/nuscenes_dataset.py
+++ b/mmdet3d/datasets/nuscenes_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
+import tempfile
+from os import path as osp
+
 import mmcv
 import numpy as np
 import pyquaternion
-import tempfile
 from nuscenes.utils.data_classes import Box as NuScenesBox
-from os import path as osp

 from mmdet.datasets import DATASETS
 from ..core import show_result
@@ -48,8 +49,9 @@ class NuScenesDataset(Custom3DDataset):
            Defaults to False.
        eval_version (bool, optional): Configuration version of evaluation.
            Defaults to  'detection_cvpr_2019'.
-        use_valid_flag (bool): Whether to use `use_valid_flag` key in the info
-            file as mask to filter gt_boxes and gt_names. Defaults to False.
+        use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
+            in the info file as mask to filter gt_boxes and gt_names.
+            Defaults to False.
    """
    NameMapping = {
        'movable_object.barrier': 'barrier',
@@ -196,7 +198,7 @@ class NuScenesDataset(Custom3DDataset):
            index (int): Index of the sample data to get.

        Returns:
-            dict: Data information that will be passed to the data \
+            dict: Data information that will be passed to the data
                preprocessing pipelines. It includes the following keys:

                - sample_idx (str): Sample index.
@@ -204,12 +206,12 @@ class NuScenesDataset(Custom3DDataset):
                - sweeps (list[dict]): Infos of sweeps.
                - timestamp (float): Sample timestamp.
                - img_filename (str, optional): Image filename.
-                - lidar2img (list[np.ndarray], optional): Transformations \
+                - lidar2img (list[np.ndarray], optional): Transformations
                    from lidar to different cameras.
                - ann_info (dict): Annotation info.
        """
        info = self.data_infos[index]
-        # standard protocal modified from SECOND.Pytorch
+        # standard protocol modified from SECOND.Pytorch
        input_dict = dict(
            sample_idx=info['token'],
            pts_filename=info['lidar_path'],
@@ -256,7 +258,7 @@ class NuScenesDataset(Custom3DDataset):
        Returns:
            dict: Annotation information consists of the following keys:

-                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \
+                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
                    3D ground truth bboxes
                - gt_labels_3d (np.ndarray): Labels of ground truths.
                - gt_names (list[str]): Class names of ground truths.
@@ -374,10 +376,11 @@ class NuScenesDataset(Custom3DDataset):

        Args:
            result_path (str): Path of the result file.
-            logger (logging.Logger | str | None): Logger used for printing
+            logger (logging.Logger | str, optional): Logger used for printing
                related information during evaluation. Default: None.
-            metric (str): Metric name used for evaluation. Default: 'bbox'.
-            result_name (str): Result name in the metric prefix.
+            metric (str, optional): Metric name used for evaluation.
+                Default: 'bbox'.
+            result_name (str, optional): Result name in the metric prefix.
                Default: 'pts_bbox'.

        Returns:
@@ -427,14 +430,14 @@ class NuScenesDataset(Custom3DDataset):

        Args:
            results (list[dict]): Testing results of the dataset.
-            jsonfile_prefix (str | None): The prefix of json files. It includes
+            jsonfile_prefix (str): The prefix of json files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.

        Returns:
-            tuple: Returns (result_files, tmp_dir), where `result_files` is a \
-                dict containing the json filepaths, `tmp_dir` is the temporal \
-                directory created for saving json files when \
+            tuple: Returns (result_files, tmp_dir), where `result_files` is a
+                dict containing the json filepaths, `tmp_dir` is the temporal
+                directory created for saving json files when
                `jsonfile_prefix` is not specified.
        """
        assert isinstance(results, list), 'results must be a list'
@@ -480,15 +483,16 @@ class NuScenesDataset(Custom3DDataset):

        Args:
            results (list[dict]): Testing results of the dataset.
-            metric (str | list[str]): Metrics to be evaluated.
-            logger (logging.Logger | str | None): Logger used for printing
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Default: 'bbox'.
+            logger (logging.Logger | str, optional): Logger used for printing
                related information during evaluation. Default: None.
-            jsonfile_prefix (str | None): The prefix of json files. It includes
+            jsonfile_prefix (str, optional): The prefix of json files including
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
-            show (bool): Whether to visualize.
+            show (bool, optional): Whether to visualize.
                Default: False.
-            out_dir (str): Path to save the visualization results.
+            out_dir (str, optional): Path to save the visualization results.
                Default: None.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
@@ -510,8 +514,8 @@ class NuScenesDataset(Custom3DDataset):
        if tmp_dir is not None:
            tmp_dir.cleanup()

-        if show:
-            self.show(results, out_dir, pipeline=pipeline)
+        if show or out_dir:
+            self.show(results, out_dir, show=show, pipeline=pipeline)
        return results_dict

    def _build_default_pipeline(self):
@@ -535,13 +539,14 @@ class NuScenesDataset(Custom3DDataset):
        ]
        return Compose(pipeline)

-    def show(self, results, out_dir, show=True, pipeline=None):
+    def show(self, results, out_dir, show=False, pipeline=None):
        """Results visualization.

        Args:
            results (list[dict]): List of bounding boxes results.
            out_dir (str): Output directory of visualization result.
-            show (bool): Visualize the results online.
+            show (bool): Whether to visualize the results online.
+                Default: False.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
        """
@@ -588,9 +593,9 @@ def output_to_nusc_box(detection):
    box_gravity_center = box3d.gravity_center.numpy()
    box_dims = box3d.dims.numpy()
    box_yaw = box3d.yaw.numpy()
-    # TODO: check whether this is necessary
-    # with dir_offset & dir_limit in the head
-    box_yaw = -box_yaw - np.pi / 2
+
+    # our LiDAR coordinate system -> nuScenes box coordinate system
+    nus_box_dims = box_dims[:, [1, 0, 2]]

    box_list = []
    for i in range(len(box3d)):
@@ -602,7 +607,7 @@ def output_to_nusc_box(detection):
        # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
        box = NuScenesBox(
            box_gravity_center[i],
-            box_dims[i],
+            nus_box_dims[i],
            quat,
            label=labels[i],
            score=scores[i],
@@ -624,7 +629,7 @@ def lidar_nusc_box_to_global(info,
        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
        classes (list[str]): Mapped classes in the evaluation.
        eval_configs (object): Evaluation configuration object.
-        eval_version (str): Evaluation version.
+        eval_version (str, optional): Evaluation version.
            Default: 'detection_cvpr_2019'

    Returns:

--- a/mmdet3d/datasets/nuscenes_mono_dataset.py
+++ b/mmdet3d/datasets/nuscenes_mono_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
+import tempfile
+import warnings
+from os import path as osp
+
 import mmcv
 import numpy as np
 import pyquaternion
-import tempfile
 import torch
-import warnings
 from nuscenes.utils.data_classes import Box as NuScenesBox
-from os import path as osp

 from mmdet3d.core import bbox3d2result, box3d_multiclass_nms, xywhr2xyxyr
 from mmdet.datasets import DATASETS, CocoDataset
@@ -44,8 +45,9 @@ class NuScenesMonoDataset(CocoDataset):
            - 'Camera': Box in camera coordinates.
        eval_version (str, optional): Configuration version of evaluation.
            Defaults to  'detection_cvpr_2019'.
-        use_valid_flag (bool): Whether to use `use_valid_flag` key in the info
-            file as mask to filter gt_boxes and gt_names. Defaults to False.
+        use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
+            in the info file as mask to filter gt_boxes and gt_names.
+            Defaults to False.
        version (str, optional): Dataset version. Defaults to 'v1.0-trainval'.
    """
    CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
@@ -140,8 +142,8 @@ class NuScenesMonoDataset(CocoDataset):
            ann_info (list[dict]): Annotation info of an image.

        Returns:
-            dict: A dict containing the following keys: bboxes, labels, \
-                gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d, \
+            dict: A dict containing the following keys: bboxes, labels,
+                gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d,
                depths, bboxes_ignore, masks, seg_map
        """
        gt_bboxes = []
@@ -394,10 +396,11 @@ class NuScenesMonoDataset(CocoDataset):

        Args:
            result_path (str): Path of the result file.
-            logger (logging.Logger | str | None): Logger used for printing
+            logger (logging.Logger | str, optional): Logger used for printing
                related information during evaluation. Default: None.
-            metric (str): Metric name used for evaluation. Default: 'bbox'.
-            result_name (str): Result name in the metric prefix.
+            metric (str, optional): Metric name used for evaluation.
+                Default: 'bbox'.
+            result_name (str, optional): Result name in the metric prefix.
                Default: 'img_bbox'.

        Returns:
@@ -448,13 +451,13 @@ class NuScenesMonoDataset(CocoDataset):
        Args:
            results (list[tuple | numpy.ndarray]): Testing results of the
                dataset.
-            jsonfile_prefix (str | None): The prefix of json files. It includes
+            jsonfile_prefix (str): The prefix of json files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.

        Returns:
-            tuple: (result_files, tmp_dir), result_files is a dict containing \
-                the json filepaths, tmp_dir is the temporal directory created \
+            tuple: (result_files, tmp_dir), result_files is a dict containing
+                the json filepaths, tmp_dir is the temporal directory created
                for saving json files when jsonfile_prefix is not specified.
        """
        assert isinstance(results, list), 'results must be a list'
@@ -504,15 +507,18 @@ class NuScenesMonoDataset(CocoDataset):

        Args:
            results (list[dict]): Testing results of the dataset.
-            metric (str | list[str]): Metrics to be evaluated.
-            logger (logging.Logger | str | None): Logger used for printing
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Default: 'bbox'.
+            logger (logging.Logger | str, optional): Logger used for printing
                related information during evaluation. Default: None.
-            jsonfile_prefix (str | None): The prefix of json files. It includes
+            jsonfile_prefix (str): The prefix of json files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
-            show (bool): Whether to visualize.
+            result_names (list[str], optional): Result names in the
+                metric prefix. Default: ['img_bbox'].
+            show (bool, optional): Whether to visualize.
                Default: False.
-            out_dir (str): Path to save the visualization results.
+            out_dir (str, optional): Path to save the visualization results.
                Default: None.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
@@ -535,7 +541,7 @@ class NuScenesMonoDataset(CocoDataset):
        if tmp_dir is not None:
            tmp_dir.cleanup()

-        if show:
+        if show or out_dir:
            self.show(results, out_dir, pipeline=pipeline)
        return results_dict

@@ -576,7 +582,7 @@ class NuScenesMonoDataset(CocoDataset):
        """Get data loading pipeline in self.show/evaluate function.

        Args:
-            pipeline (list[dict] | None): Input pipeline. If None is given, \
+            pipeline (list[dict]): Input pipeline. If None is given,
                get from self.pipeline.
        """
        if pipeline is None:
@@ -601,13 +607,14 @@ class NuScenesMonoDataset(CocoDataset):
        ]
        return Compose(pipeline)

-    def show(self, results, out_dir, show=True, pipeline=None):
+    def show(self, results, out_dir, show=False, pipeline=None):
        """Results visualization.

        Args:
            results (list[dict]): List of bounding boxes results.
            out_dir (str): Output directory of visualization result.
-            show (bool): Visualize the results online.
+            show (bool): Whether to visualize the results online.
+                Default: False.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
        """
@@ -696,7 +703,7 @@ def cam_nusc_box_to_global(info,
        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
        classes (list[str]): Mapped classes in the evaluation.
        eval_configs (object): Evaluation configuration object.
-        eval_version (str): Evaluation version.
+        eval_version (str, optional): Evaluation version.
            Default: 'detection_cvpr_2019'

    Returns:
@@ -736,7 +743,7 @@ def global_nusc_box_to_cam(info,
        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
        classes (list[str]): Mapped classes in the evaluation.
        eval_configs (object): Evaluation configuration object.
-        eval_version (str): Evaluation version.
+        eval_version (str, optional): Evaluation version.
            Default: 'detection_cvpr_2019'

    Returns:
@@ -769,7 +776,7 @@ def nusc_box_to_cam_box3d(boxes):
        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.

    Returns:
-        tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor): \
+        tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):
            Converted 3D bounding boxes, scores and labels.
    """
    locs = torch.Tensor([b.center for b in boxes]).view(-1, 3)

--- a/mmdet3d/datasets/pipelines/__init__.py
+++ b/mmdet3d/datasets/pipelines/__init__.py
@@ -3,17 +3,19 @@ from mmdet.datasets.pipelines import Compose
 from .dbsampler import DataBaseSampler
 from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D
 from .loading import (LoadAnnotations3D, LoadImageFromFileMono3D,
-                      LoadMultiViewImageFromFiles, LoadPointsFromFile,
-                      LoadPointsFromMultiSweeps, NormalizePointsColor,
-                      PointSegClassMapping)
+                      LoadMultiViewImageFromFiles, LoadPointsFromDict,
+                      LoadPointsFromFile, LoadPointsFromMultiSweeps,
+                      NormalizePointsColor, PointSegClassMapping)
 from .test_time_aug import MultiScaleFlipAug3D
-from .transforms_3d import (BackgroundPointsFilter, GlobalAlignment,
-                            GlobalRotScaleTrans, IndoorPatchPointSample,
-                            IndoorPointSample, ObjectNameFilter, ObjectNoise,
-                            ObjectRangeFilter, ObjectSample, PointSample,
-                            PointShuffle, PointsRangeFilter,
-                            RandomDropPointsColor, RandomFlip3D,
-                            RandomJitterPoints, VoxelBasedPointSampler)
+# yapf: disable
+from .transforms_3d import (AffineResize, BackgroundPointsFilter,
+                            GlobalAlignment, GlobalRotScaleTrans,
+                            IndoorPatchPointSample, IndoorPointSample,
+                            ObjectNameFilter, ObjectNoise, ObjectRangeFilter,
+                            ObjectSample, PointSample, PointShuffle,
+                            PointsRangeFilter, RandomDropPointsColor,
+                            RandomFlip3D, RandomJitterPoints, RandomShiftScale,
+                            VoxelBasedPointSampler)

 __all__ = [
    'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
@@ -25,5 +27,6 @@ __all__ = [
    'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter',
    'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample',
    'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor',
-    'RandomJitterPoints'
+    'RandomJitterPoints', 'AffineResize', 'RandomShiftScale',
+    'LoadPointsFromDict'
 ]
--- a/mmdet3d/datasets/pipelines/data_augment_utils.py
+++ b/mmdet3d/datasets/pipelines/data_augment_utils.py
 # Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
 import numba
 import numpy as np
-import warnings
-from numba.errors import NumbaPerformanceWarning
+from numba.core.errors import NumbaPerformanceWarning

 from mmdet3d.core.bbox import box_np_ops

@@ -21,8 +22,8 @@ def _rotation_box2d_jit_(corners, angle, rot_mat_T):
    rot_sin = np.sin(angle)
    rot_cos = np.cos(angle)
    rot_mat_T[0, 0] = rot_cos
-    rot_mat_T[0, 1] = -rot_sin
-    rot_mat_T[1, 0] = rot_sin
+    rot_mat_T[0, 1] = rot_sin
+    rot_mat_T[1, 0] = -rot_sin
    rot_mat_T[1, 1] = rot_cos
    corners[:] = corners @ rot_mat_T

@@ -34,8 +35,8 @@ def box_collision_test(boxes, qboxes, clockwise=True):
    Args:
        boxes (np.ndarray): Corners of current boxes.
        qboxes (np.ndarray): Boxes to be avoid colliding.
-        clockwise (bool): Whether the corners are in clockwise order.
-            Default: True.
+        clockwise (bool, optional): Whether the corners are in
+            clockwise order. Default: True.
    """
    N = boxes.shape[0]
    K = qboxes.shape[0]
@@ -211,8 +212,8 @@ def noise_per_box_v2_(boxes, valid_mask, loc_noises, rot_noises,
                rot_sin = np.sin(current_box[0, -1])
                rot_cos = np.cos(current_box[0, -1])
                rot_mat_T[0, 0] = rot_cos
-                rot_mat_T[0, 1] = -rot_sin
-                rot_mat_T[1, 0] = rot_sin
+                rot_mat_T[0, 1] = rot_sin
+                rot_mat_T[1, 0] = -rot_sin
                rot_mat_T[1, 1] = rot_cos
                current_corners[:] = current_box[
                    0, 2:4] * corners_norm @ rot_mat_T + current_box[0, :2]
@@ -264,18 +265,18 @@ def _rotation_matrix_3d_(rot_mat_T, angle, axis):
    rot_mat_T[:] = np.eye(3)
    if axis == 1:
        rot_mat_T[0, 0] = rot_cos
-        rot_mat_T[0, 2] = -rot_sin
-        rot_mat_T[2, 0] = rot_sin
+        rot_mat_T[0, 2] = rot_sin
+        rot_mat_T[2, 0] = -rot_sin
        rot_mat_T[2, 2] = rot_cos
    elif axis == 2 or axis == -1:
        rot_mat_T[0, 0] = rot_cos
-        rot_mat_T[0, 1] = -rot_sin
-        rot_mat_T[1, 0] = rot_sin
+        rot_mat_T[0, 1] = rot_sin
+        rot_mat_T[1, 0] = -rot_sin
        rot_mat_T[1, 1] = rot_cos
    elif axis == 0:
        rot_mat_T[1, 1] = rot_cos
-        rot_mat_T[1, 2] = -rot_sin
-        rot_mat_T[2, 1] = rot_sin
+        rot_mat_T[1, 2] = rot_sin
+        rot_mat_T[2, 1] = -rot_sin
        rot_mat_T[2, 2] = rot_cos


@@ -317,7 +318,7 @@ def box3d_transform_(boxes, loc_transform, rot_transform, valid_mask):
        boxes (np.ndarray): 3D boxes to be transformed.
        loc_transform (np.ndarray): Location transform to be applied.
        rot_transform (np.ndarray): Rotation transform to be applied.
-        valid_mask (np.ndarray | None): Mask to indicate which boxes are valid.
+        valid_mask (np.ndarray): Mask to indicate which boxes are valid.
    """
    num_box = boxes.shape[0]
    for i in range(num_box):
@@ -338,16 +339,17 @@ def noise_per_object_v3_(gt_boxes,

    Args:
        gt_boxes (np.ndarray): Ground truth boxes with shape (N, 7).
-        points (np.ndarray | None): Input point cloud with shape (M, 4).
-            Default: None.
-        valid_mask (np.ndarray | None): Mask to indicate which boxes are valid.
-            Default: None.
-        rotation_perturb (float): Rotation perturbation. Default: pi / 4.
-        center_noise_std (float): Center noise standard deviation.
+        points (np.ndarray, optional): Input point cloud with
+            shape (M, 4). Default: None.
+        valid_mask (np.ndarray, optional): Mask to indicate which
+            boxes are valid. Default: None.
+        rotation_perturb (float, optional): Rotation perturbation.
+            Default: pi / 4.
+        center_noise_std (float, optional): Center noise standard deviation.
            Default: 1.0.
-        global_random_rot_range (float): Global random rotation range.
-            Default: pi/4.
-        num_try (int): Number of try. Default: 100.
+        global_random_rot_range (float, optional): Global random rotation
+            range. Default: pi/4.
+        num_try (int, optional): Number of try. Default: 100.
    """
    num_boxes = gt_boxes.shape[0]
    if not isinstance(rotation_perturb, (list, tuple, np.ndarray)):

--- a/mmdet3d/datasets/pipelines/dbsampler.py
+++ b/mmdet3d/datasets/pipelines/dbsampler.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
+import os
+
 import mmcv
 import numpy as np
-import os

 from mmdet3d.core.bbox import box_np_ops
 from mmdet3d.datasets.pipelines import data_augment_utils
@@ -15,10 +16,10 @@ class BatchSampler:

    Args:
        sample_list (list[dict]): List of samples.
-        name (str | None): The category of samples. Default: None.
-        epoch (int | None): Sampling epoch. Default: None.
-        shuffle (bool): Whether to shuffle indices. Default: False.
-        drop_reminder (bool): Drop reminder. Default: False.
+        name (str, optional): The category of samples. Default: None.
+        epoch (int, optional): Sampling epoch. Default: None.
+        shuffle (bool, optional): Whether to shuffle indices. Default: False.
+        drop_reminder (bool, optional): Drop reminder. Default: False.
    """

    def __init__(self,
@@ -87,9 +88,9 @@ class DataBaseSampler(object):
        rate (float): Rate of actual sampled over maximum sampled number.
        prepare (dict): Name of preparation functions and the input value.
        sample_groups (dict): Sampled classes and numbers.
-        classes (list[str]): List of classes. Default: None.
-        points_loader(dict): Config of points loader. Default: dict(
-            type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3])
+        classes (list[str], optional): List of classes. Default: None.
+        points_loader(dict, optional): Config of points loader. Default:
+            dict(type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3])
    """

    def __init__(self,
@@ -188,7 +189,7 @@ class DataBaseSampler(object):
                db_infos[name] = filtered_infos
        return db_infos

-    def sample_all(self, gt_bboxes, gt_labels, img=None):
+    def sample_all(self, gt_bboxes, gt_labels, img=None, ground_plane=None):
        """Sampling all categories of bboxes.

        Args:
@@ -198,9 +199,9 @@ class DataBaseSampler(object):
        Returns:
            dict: Dict of sampled 'pseudo ground truths'.

-                - gt_labels_3d (np.ndarray): ground truths labels \
+                - gt_labels_3d (np.ndarray): ground truths labels
                    of sampled objects.
-                - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): \
+                - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`):
                    sampled ground truth 3D bounding boxes
                - points (np.ndarray): sampled points
                - group_ids (np.ndarray): ids of sampled ground truths
@@ -263,6 +264,15 @@ class DataBaseSampler(object):

            gt_labels = np.array([self.cat2label[s['name']] for s in sampled],
                                 dtype=np.long)
+
+            if ground_plane is not None:
+                xyz = sampled_gt_bboxes[:, :3]
+                dz = (ground_plane[:3][None, :] *
+                      xyz).sum(-1) + ground_plane[3]
+                sampled_gt_bboxes[:, 2] -= dz
+                for i, s_points in enumerate(s_points_list):
+                    s_points.tensor[:, 2].sub_(dz[i])
+
            ret = {
                'gt_labels_3d':
                gt_labels,

--- a/mmdet3d/datasets/pipelines/formating.py
+++ b/mmdet3d/datasets/pipelines/formating.py
@@ -24,7 +24,7 @@ class DefaultFormatBundle(object):
    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
    - gt_labels: (1)to tensor, (2)to DataContainer
    - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
-    - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, \
+    - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
                       (3)to DataContainer (stack=True)
    """

@@ -92,8 +92,8 @@ class Collect3D(object):
    The "img_meta" item is always populated.  The contents of the "img_meta"
    dictionary depends on "meta_keys". By default this includes:

-        - 'img_shape': shape of the image input to the network as a tuple \
-            (h, w, c).  Note that images may be zero padded on the \
+        - 'img_shape': shape of the image input to the network as a tuple
+            (h, w, c).  Note that images may be zero padded on the
            bottom/right if the batch tensor is larger than this shape.
        - 'scale_factor': a float indicating the preprocessing scale
        - 'flip': a boolean indicating if image flip transform was used
@@ -103,9 +103,9 @@ class Collect3D(object):
        - 'lidar2img': transform from lidar to image
        - 'depth2img': transform from depth to image
        - 'cam2img': transform from camera to image
-        - 'pcd_horizontal_flip': a boolean indicating if point cloud is \
+        - 'pcd_horizontal_flip': a boolean indicating if point cloud is
            flipped horizontally
-        - 'pcd_vertical_flip': a boolean indicating if point cloud is \
+        - 'pcd_vertical_flip': a boolean indicating if point cloud is
            flipped vertically
        - 'box_mode_3d': 3D box mode
        - 'box_type_3d': 3D box type
@@ -130,15 +130,16 @@ class Collect3D(object):
            'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename')
    """

-    def __init__(self,
+    def __init__(
+        self,
        keys,
        meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
-                            'depth2img', 'cam2img', 'pad_shape',
-                            'scale_factor', 'flip', 'pcd_horizontal_flip',
-                            'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d',
-                            'img_norm_cfg', 'pcd_trans', 'sample_idx',
-                            'pcd_scale_factor', 'pcd_rotation', 'pts_filename',
-                            'transformation_3d_flow')):
+                   'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip',
+                   'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d',
+                   'box_type_3d', 'img_norm_cfg', 'pcd_trans', 'sample_idx',
+                   'pcd_scale_factor', 'pcd_rotation', 'pcd_rotation_angle',
+                   'pts_filename', 'transformation_3d_flow', 'trans_mat',
+                   'affine_aug')):
        self.keys = keys
        self.meta_keys = meta_keys


--- a/mmdet3d/datasets/pipelines/loading.py
+++ b/mmdet3d/datasets/pipelines/loading.py
@@ -14,9 +14,10 @@ class LoadMultiViewImageFromFiles(object):
    Expects results['img_filename'] to be a list of filenames.

    Args:
-        to_float32 (bool): Whether to convert the img to float32.
+        to_float32 (bool, optional): Whether to convert the img to float32.
            Defaults to False.
-        color_type (str): Color type of the file. Defaults to 'unchanged'.
+        color_type (str, optional): Color type of the file.
+            Defaults to 'unchanged'.
    """

    def __init__(self, to_float32=False, color_type='unchanged'):
@@ -30,7 +31,7 @@ class LoadMultiViewImageFromFiles(object):
            results (dict): Result dict containing multi-view image filenames.

        Returns:
-            dict: The result dict containing the multi-view image data. \
+            dict: The result dict containing the multi-view image data.
                Added keys and values are described below.

                - filename (str): Multi-view image filenames.
@@ -48,7 +49,7 @@ class LoadMultiViewImageFromFiles(object):
        if self.to_float32:
            img = img.astype(np.float32)
        results['filename'] = filename
-        # unravel to list, see `DefaultFormatBundle` in formating.py
+        # unravel to list, see `DefaultFormatBundle` in formatting.py
        # which will transpose each image separately and then stack into array
        results['img'] = [img[..., i] for i in range(img.shape[-1])]
        results['img_shape'] = img.shape
@@ -77,7 +78,7 @@ class LoadImageFromFileMono3D(LoadImageFromFile):
    detection, additional camera parameters need to be loaded.

    Args:
-        kwargs (dict): Arguments are the same as those in \
+        kwargs (dict): Arguments are the same as those in
            :class:`LoadImageFromFile`.
    """

@@ -102,17 +103,20 @@ class LoadPointsFromMultiSweeps(object):
    This is usually used for nuScenes dataset to utilize previous sweeps.

    Args:
-        sweeps_num (int): Number of sweeps. Defaults to 10.
-        load_dim (int): Dimension number of the loaded points. Defaults to 5.
-        use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4].
-        file_client_args (dict): Config dict of file clients, refer to
+        sweeps_num (int, optional): Number of sweeps. Defaults to 10.
+        load_dim (int, optional): Dimension number of the loaded points.
+            Defaults to 5.
+        use_dim (list[int], optional): Which dimension to use.
+            Defaults to [0, 1, 2, 4].
+        file_client_args (dict, optional): Config dict of file clients,
+            refer to
            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
            for more details. Defaults to dict(backend='disk').
-        pad_empty_sweeps (bool): Whether to repeat keyframe when
+        pad_empty_sweeps (bool, optional): Whether to repeat keyframe when
            sweeps is empty. Defaults to False.
-        remove_close (bool): Whether to remove close points.
+        remove_close (bool, optional): Whether to remove close points.
            Defaults to False.
-        test_mode (bool): If test_model=True used for testing, it will not
+        test_mode (bool, optional): If `test_mode=True`, it will not
            randomly sample sweeps but select the nearest N frames.
            Defaults to False.
    """
@@ -161,7 +165,7 @@ class LoadPointsFromMultiSweeps(object):

        Args:
            points (np.ndarray | :obj:`BasePoints`): Sweep points.
-            radius (float): Radius below which points are removed.
+            radius (float, optional): Radius below which points are removed.
                Defaults to 1.0.

        Returns:
@@ -182,14 +186,14 @@ class LoadPointsFromMultiSweeps(object):
        """Call function to load multi-sweep point clouds from files.

        Args:
-            results (dict): Result dict containing multi-sweep point cloud \
+            results (dict): Result dict containing multi-sweep point cloud
                filenames.

        Returns:
-            dict: The result dict containing the multi-sweep points data. \
+            dict: The result dict containing the multi-sweep points data.
                Added key and value are described below.

-                - points (np.ndarray | :obj:`BasePoints`): Multi-sweep point \
+                - points (np.ndarray | :obj:`BasePoints`): Multi-sweep point
                    cloud arrays.
        """
        points = results['points']
@@ -243,8 +247,8 @@ class PointSegClassMapping(object):

    Args:
        valid_cat_ids (tuple[int]): A tuple of valid category.
-        max_cat_id (int): The max possible cat_id in input segmentation mask.
-            Defaults to 40.
+        max_cat_id (int, optional): The max possible cat_id in input
+            segmentation mask. Defaults to 40.
    """

    def __init__(self, valid_cat_ids, max_cat_id=40):
@@ -268,7 +272,7 @@ class PointSegClassMapping(object):
            results (dict): Result dict containing point semantic masks.

        Returns:
-            dict: The result dict containing the mapped category ids. \
+            dict: The result dict containing the mapped category ids.
                Updated key and value are described below.

                - pts_semantic_mask (np.ndarray): Mapped semantic masks.
@@ -307,7 +311,7 @@ class NormalizePointsColor(object):
            results (dict): Result dict containing point clouds data.

        Returns:
-            dict: The result dict containing the normalized points. \
+            dict: The result dict containing the normalized points.
                Updated key and value are described below.

                - points (:obj:`BasePoints`): Points after color normalization.
@@ -334,7 +338,7 @@ class NormalizePointsColor(object):
 class LoadPointsFromFile(object):
    """Load Points From File.

-    Load sunrgbd and scannet points from file.
+    Load points from file.

    Args:
        coord_type (str): The type of coordinates of points cloud.
@@ -342,14 +346,17 @@ class LoadPointsFromFile(object):
            - 'LIDAR': Points in LiDAR coordinates.
            - 'DEPTH': Points in depth coordinates, usually for indoor dataset.
            - 'CAMERA': Points in camera coordinates.
-        load_dim (int): The dimension of the loaded points.
+        load_dim (int, optional): The dimension of the loaded points.
            Defaults to 6.
-        use_dim (list[int]): Which dimensions of the points to be used.
+        use_dim (list[int], optional): Which dimensions of the points to use.
            Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
            or use_dim=[0, 1, 2, 3] to use the intensity dimension.
-        shift_height (bool): Whether to use shifted height. Defaults to False.
-        use_color (bool): Whether to use color features. Defaults to False.
-        file_client_args (dict): Config dict of file clients, refer to
+        shift_height (bool, optional): Whether to use shifted height.
+            Defaults to False.
+        use_color (bool, optional): Whether to use color features.
+            Defaults to False.
+        file_client_args (dict, optional): Config dict of file clients,
+            refer to
            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
            for more details. Defaults to dict(backend='disk').
    """
@@ -405,7 +412,7 @@ class LoadPointsFromFile(object):
            results (dict): Result dict containing point clouds data.

        Returns:
-            dict: The result dict containing the point clouds data. \
+            dict: The result dict containing the point clouds data.
                Added key and value are described below.

                - points (:obj:`BasePoints`): Point clouds data.
@@ -453,6 +460,15 @@ class LoadPointsFromFile(object):
        return repr_str


+@PIPELINES.register_module()
+class LoadPointsFromDict(LoadPointsFromFile):
+    """Load Points From Dict."""
+
+    def __call__(self, results):
+        assert 'points' in results
+        return results
+
+
 @PIPELINES.register_module()
 class LoadAnnotations3D(LoadAnnotations):
    """Load Annotations3D.

--- a/mmdet3d/datasets/pipelines/test_time_aug.py
+++ b/mmdet3d/datasets/pipelines/test_time_aug.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import mmcv
 import warnings
 from copy import deepcopy

+import mmcv
+
 from mmdet.datasets.builder import PIPELINES
 from mmdet.datasets.pipelines import Compose

@@ -16,18 +17,19 @@ class MultiScaleFlipAug3D(object):
        img_scale (tuple | list[tuple]: Images scales for resizing.
        pts_scale_ratio (float | list[float]): Points scale ratios for
            resizing.
-        flip (bool): Whether apply flip augmentation. Defaults to False.
-        flip_direction (str | list[str]): Flip augmentation directions
-            for images, options are "horizontal" and "vertical".
+        flip (bool, optional): Whether apply flip augmentation.
+            Defaults to False.
+        flip_direction (str | list[str], optional): Flip augmentation
+            directions for images, options are "horizontal" and "vertical".
            If flip_direction is list, multiple flip augmentations will
            be applied. It has no effect when ``flip == False``.
            Defaults to "horizontal".
-        pcd_horizontal_flip (bool): Whether apply horizontal flip augmentation
-            to point cloud. Defaults to True. Note that it works only when
-            'flip' is turned on.
-        pcd_vertical_flip (bool): Whether apply vertical flip augmentation
-            to point cloud. Defaults to True. Note that it works only when
-            'flip' is turned on.
+        pcd_horizontal_flip (bool, optional): Whether apply horizontal
+            flip augmentation to point cloud. Defaults to True.
+            Note that it works only when 'flip' is turned on.
+        pcd_vertical_flip (bool, optional): Whether apply vertical flip
+            augmentation to point cloud. Defaults to True.
+            Note that it works only when 'flip' is turned on.
    """

    def __init__(self,
@@ -70,7 +72,7 @@ class MultiScaleFlipAug3D(object):
            results (dict): Result dict contains the data to augment.

        Returns:
-            dict: The result dict contains the data that is augmented with \
+            dict: The result dict contains the data that is augmented with
                different scales and flips.
        """
        aug_data = []

--- a/mmdet3d/datasets/pipelines/transforms_3d.py
+++ b/mmdet3d/datasets/pipelines/transforms_3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
+import random
 import warnings
+
+import cv2
+import numpy as np
 from mmcv import is_tuple_of
 from mmcv.utils import build_from_cfg

@@ -22,7 +25,7 @@ class RandomDropPointsColor(object):
    util/transform.py#L223>`_ for more details.

    Args:
-        drop_ratio (float): The probability of dropping point colors.
+        drop_ratio (float, optional): The probability of dropping point colors.
            Defaults to 0.2.
    """

@@ -38,7 +41,7 @@ class RandomDropPointsColor(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after color dropping, \
+            dict: Results after color dropping,
                'points' key is updated in the result dict.
        """
        points = input_dict['points']
@@ -105,10 +108,11 @@ class RandomFlip3D(RandomFlip):

        Args:
            input_dict (dict): Result dict from loading pipeline.
-            direction (str): Flip direction. Default: horizontal.
+            direction (str, optional): Flip direction.
+                Default: 'horizontal'.

        Returns:
-            dict: Flipped results, 'points', 'bbox3d_fields' keys are \
+            dict: Flipped results, 'points', 'bbox3d_fields' keys are
                updated in the result dict.
        """
        assert direction in ['horizontal', 'vertical']
@@ -141,15 +145,15 @@ class RandomFlip3D(RandomFlip):
            input_dict['cam2img'][0][2] = w - input_dict['cam2img'][0][2]

    def __call__(self, input_dict):
-        """Call function to flip points, values in the ``bbox3d_fields`` and \
+        """Call function to flip points, values in the ``bbox3d_fields`` and
        also flip 2D image and its annotations.

        Args:
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Flipped results, 'flip', 'flip_direction', \
-                'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added \
+            dict: Flipped results, 'flip', 'flip_direction',
+                'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added
                into result dict.
        """
        # flip 2D image and its annotations
@@ -191,20 +195,20 @@ class RandomFlip3D(RandomFlip):
 class RandomJitterPoints(object):
    """Randomly jitter point coordinates.

-    Different from the global translation in ``GlobalRotScaleTrans``, here we \
+    Different from the global translation in ``GlobalRotScaleTrans``, here we
        apply different noises to each point in a scene.

    Args:
        jitter_std (list[float]): The standard deviation of jittering noise.
-            This applies random noise to all points in a 3D scene, which is \
-            sampled from a gaussian distribution whose standard deviation is \
+            This applies random noise to all points in a 3D scene, which is
+            sampled from a gaussian distribution whose standard deviation is
            set by ``jitter_std``. Defaults to [0.01, 0.01, 0.01]
-        clip_range (list[float] | None): Clip the randomly generated jitter \
+        clip_range (list[float]): Clip the randomly generated jitter
            noise into this range. If None is given, don't perform clipping.
            Defaults to [-0.05, 0.05]

    Note:
-        This transform should only be used in point cloud segmentation tasks \
+        This transform should only be used in point cloud segmentation tasks
            because we don't transform ground-truth bboxes accordingly.
        For similar transform in detection task, please refer to `ObjectNoise`.
    """
@@ -233,7 +237,7 @@ class RandomJitterPoints(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after adding noise to each point, \
+            dict: Results after adding noise to each point,
                'points' key is updated in the result dict.
        """
        points = input_dict['points']
@@ -264,14 +268,17 @@ class ObjectSample(object):
        sample_2d (bool): Whether to also paste 2D image patch to the images
            This should be true when applying multi-modality cut-and-paste.
            Defaults to False.
+        use_ground_plane (bool): Whether to use gound plane to adjust the
+            3D labels.
    """

-    def __init__(self, db_sampler, sample_2d=False):
+    def __init__(self, db_sampler, sample_2d=False, use_ground_plane=False):
        self.sampler_cfg = db_sampler
        self.sample_2d = sample_2d
        if 'type' not in db_sampler.keys():
            db_sampler['type'] = 'DataBaseSampler'
        self.db_sampler = build_from_cfg(db_sampler, OBJECTSAMPLERS)
+        self.use_ground_plane = use_ground_plane

    @staticmethod
    def remove_points_in_boxes(points, boxes):
@@ -295,13 +302,18 @@ class ObjectSample(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after object sampling augmentation, \
-                'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated \
+            dict: Results after object sampling augmentation,
+                'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated
                in the result dict.
        """
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
        gt_labels_3d = input_dict['gt_labels_3d']

+        if self.use_ground_plane and 'plane' in input_dict['ann_info']:
+            ground_plane = input_dict['ann_info']['plane']
+            input_dict['plane'] = ground_plane
+        else:
+            ground_plane = None
        # change to float for blending operation
        points = input_dict['points']
        if self.sample_2d:
@@ -315,7 +327,10 @@ class ObjectSample(object):
                img=img)
        else:
            sampled_dict = self.db_sampler.sample_all(
-                gt_bboxes_3d.tensor.numpy(), gt_labels_3d, img=None)
+                gt_bboxes_3d.tensor.numpy(),
+                gt_labels_3d,
+                img=None,
+                ground_plane=ground_plane)

        if sampled_dict is not None:
            sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
@@ -392,13 +407,13 @@ class ObjectNoise(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after adding noise to each object, \
+            dict: Results after adding noise to each object,
                'points', 'gt_bboxes_3d' keys are updated in the result dict.
        """
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
        points = input_dict['points']

-        # TODO: check this inplace function
+        # TODO: this is inplace operation
        numpy_box = gt_bboxes_3d.tensor.numpy()
        numpy_points = points.tensor.numpy()

@@ -432,10 +447,10 @@ class GlobalAlignment(object):
        rotation_axis (int): Rotation axis for points and bboxes rotation.

    Note:
-        We do not record the applied rotation and translation as in \
-            GlobalRotScaleTrans. Because usually, we do not need to reverse \
+        We do not record the applied rotation and translation as in
+            GlobalRotScaleTrans. Because usually, we do not need to reverse
            the alignment step.
-        For example, ScanNet 3D detection task uses aligned ground-truth \
+        For example, ScanNet 3D detection task uses aligned ground-truth
            bounding boxes for evaluation.
    """

@@ -487,7 +502,7 @@ class GlobalAlignment(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after global alignment, 'points' and keys in \
+            dict: Results after global alignment, 'points' and keys in
                input_dict['bbox3d_fields'] are updated in the result dict.
        """
        assert 'axis_align_matrix' in input_dict['ann_info'].keys(), \
@@ -516,15 +531,15 @@ class GlobalRotScaleTrans(object):
    """Apply global rotation, scaling and translation to a 3D scene.

    Args:
-        rot_range (list[float]): Range of rotation angle.
+        rot_range (list[float], optional): Range of rotation angle.
            Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).
-        scale_ratio_range (list[float]): Range of scale ratio.
+        scale_ratio_range (list[float], optional): Range of scale ratio.
            Defaults to [0.95, 1.05].
-        translation_std (list[float]): The standard deviation of translation
-            noise. This applies random translation to a scene by a noise, which
+        translation_std (list[float], optional): The standard deviation of
+            translation noise applied to a scene, which
            is sampled from a gaussian distribution whose standard deviation
            is set by ``translation_std``. Defaults to [0, 0, 0]
-        shift_height (bool): Whether to shift height.
+        shift_height (bool, optional): Whether to shift height.
            (the fourth dimension of indoor points) when scaling.
            Defaults to False.
    """
@@ -563,8 +578,8 @@ class GlobalRotScaleTrans(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after translation, 'points', 'pcd_trans' \
-                and keys in input_dict['bbox3d_fields'] are updated \
+            dict: Results after translation, 'points', 'pcd_trans'
+                and keys in input_dict['bbox3d_fields'] are updated
                in the result dict.
        """
        translation_std = np.array(self.translation_std, dtype=np.float32)
@@ -582,8 +597,8 @@ class GlobalRotScaleTrans(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after rotation, 'points', 'pcd_rotation' \
-                and keys in input_dict['bbox3d_fields'] are updated \
+            dict: Results after rotation, 'points', 'pcd_rotation'
+                and keys in input_dict['bbox3d_fields'] are updated
                in the result dict.
        """
        rotation = self.rot_range
@@ -593,6 +608,7 @@ class GlobalRotScaleTrans(object):
        if len(input_dict['bbox3d_fields']) == 0:
            rot_mat_T = input_dict['points'].rotate(noise_rotation)
            input_dict['pcd_rotation'] = rot_mat_T
+            input_dict['pcd_rotation_angle'] = noise_rotation
            return

        # rotate points with bboxes
@@ -602,6 +618,7 @@ class GlobalRotScaleTrans(object):
                    noise_rotation, input_dict['points'])
                input_dict['points'] = points
                input_dict['pcd_rotation'] = rot_mat_T
+                input_dict['pcd_rotation_angle'] = noise_rotation

    def _scale_bbox_points(self, input_dict):
        """Private function to scale bounding boxes and points.
@@ -610,7 +627,7 @@ class GlobalRotScaleTrans(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after scaling, 'points'and keys in \
+            dict: Results after scaling, 'points'and keys in
                input_dict['bbox3d_fields'] are updated in the result dict.
        """
        scale = input_dict['pcd_scale_factor']
@@ -632,7 +649,7 @@ class GlobalRotScaleTrans(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after scaling, 'pcd_scale_factor' are updated \
+            dict: Results after scaling, 'pcd_scale_factor' are updated
                in the result dict.
        """
        scale_factor = np.random.uniform(self.scale_ratio_range[0],
@@ -640,7 +657,7 @@ class GlobalRotScaleTrans(object):
        input_dict['pcd_scale_factor'] = scale_factor

    def __call__(self, input_dict):
-        """Private function to rotate, scale and translate bounding boxes and \
+        """Private function to rotate, scale and translate bounding boxes and
        points.

        Args:
@@ -648,7 +665,7 @@ class GlobalRotScaleTrans(object):

        Returns:
            dict: Results after scaling, 'points', 'pcd_rotation',
-                'pcd_scale_factor', 'pcd_trans' and keys in \
+                'pcd_scale_factor', 'pcd_trans' and keys in
                input_dict['bbox3d_fields'] are updated in the result dict.
        """
        if 'transformation_3d_flow' not in input_dict:
@@ -686,7 +703,7 @@ class PointShuffle(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after filtering, 'points', 'pts_instance_mask' \
+            dict: Results after filtering, 'points', 'pts_instance_mask'
                and 'pts_semantic_mask' keys are updated in the result dict.
        """
        idx = input_dict['points'].shuffle()
@@ -725,7 +742,7 @@ class ObjectRangeFilter(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' \
+            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
                keys are updated in the result dict.
        """
        # Check points instance type and initialise bev_range
@@ -777,7 +794,7 @@ class PointsRangeFilter(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after filtering, 'points', 'pts_instance_mask' \
+            dict: Results after filtering, 'points', 'pts_instance_mask'
                and 'pts_semantic_mask' keys are updated in the result dict.
        """
        points = input_dict['points']
@@ -823,7 +840,7 @@ class ObjectNameFilter(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' \
+            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
                keys are updated in the result dict.
        """
        gt_labels_3d = input_dict['gt_labels_3d']
@@ -891,8 +908,8 @@ class PointSample(object):
        if sample_range is not None and not replace:
            # Only sampling the near points when len(points) >= num_samples
            depth = np.linalg.norm(points.tensor, axis=1)
-            far_inds = np.where(depth > sample_range)[0]
-            near_inds = np.where(depth <= sample_range)[0]
+            far_inds = np.where(depth >= sample_range)[0]
+            near_inds = np.where(depth < sample_range)[0]
            # in case there are too many far points
            if len(far_inds) > num_samples:
                far_inds = np.random.choice(
@@ -915,7 +932,7 @@ class PointSample(object):
        Args:
            input_dict (dict): Result dict from loading pipeline.
        Returns:
-            dict: Results after sampling, 'points', 'pts_instance_mask' \
+            dict: Results after sampling, 'points', 'pts_instance_mask'
                and 'pts_semantic_mask' keys are updated in the result dict.
        """
        points = results['points']
@@ -996,10 +1013,10 @@ class IndoorPatchPointSample(object):
            additional features. Defaults to False.
        num_try (int, optional): Number of times to try if the patch selected
            is invalid. Defaults to 10.
-        enlarge_size (float | None, optional): Enlarge the sampled patch to
+        enlarge_size (float, optional): Enlarge the sampled patch to
            [-block_size / 2 - enlarge_size, block_size / 2 + enlarge_size] as
            an augmentation. If None, set it as 0. Defaults to 0.2.
-        min_unique_num (int | None, optional): Minimum number of unique points
+        min_unique_num (int, optional): Minimum number of unique points
            the sampled patch should contain. If None, use PointNet++'s method
            to judge uniqueness. Defaults to None.
        eps (float, optional): A value added to patch boundary to guarantee
@@ -1040,7 +1057,7 @@ class IndoorPatchPointSample(object):
                          attribute_dims, point_type):
        """Generating model input.

-        Generate input by subtracting patch center and adding additional \
+        Generate input by subtracting patch center and adding additional
            features. Currently support colors and normalized xyz as features.

        Args:
@@ -1184,7 +1201,7 @@ class IndoorPatchPointSample(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after sampling, 'points', 'pts_instance_mask' \
+            dict: Results after sampling, 'points', 'pts_instance_mask'
                and 'pts_semantic_mask' keys are updated in the result dict.
        """
        points = results['points']
@@ -1244,7 +1261,7 @@ class BackgroundPointsFilter(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after filtering, 'points', 'pts_instance_mask' \
+            dict: Results after filtering, 'points', 'pts_instance_mask'
                and 'pts_semantic_mask' keys are updated in the result dict.
        """
        points = input_dict['points']
@@ -1342,7 +1359,7 @@ class VoxelBasedPointSampler(object):
            input_dict (dict): Result dict from loading pipeline.

        Returns:
-            dict: Results after sampling, 'points', 'pts_instance_mask' \
+            dict: Results after sampling, 'points', 'pts_instance_mask'
                and 'pts_semantic_mask' keys are updated in the result dict.
        """
        points = results['points']
@@ -1423,3 +1440,258 @@ class VoxelBasedPointSampler(object):
        repr_str += ' ' * indent + 'prev_voxel_generator=\n'
        repr_str += f'{_auto_indent(repr(self.prev_voxel_generator), 8)})'
        return repr_str
+
+
+@PIPELINES.register_module()
+class AffineResize(object):
+    """Get the affine transform matrices to the target size.
+
+    Different from :class:`RandomAffine` in MMDetection, this class can
+    calculate the affine transform matrices while resizing the input image
+    to a fixed size. The affine transform matrices include: 1) matrix
+    transforming original image to the network input image size. 2) matrix
+    transforming original image to the network output feature map size.
+
+    Args:
+        img_scale (tuple): Images scales for resizing.
+        down_ratio (int): The down ratio of feature map.
+            Actually the arg should be >= 1.
+        bbox_clip_border (bool, optional): Whether clip the objects
+            outside the border of the image. Defaults to True.
+    """
+
+    def __init__(self, img_scale, down_ratio, bbox_clip_border=True):
+
+        self.img_scale = img_scale
+        self.down_ratio = down_ratio
+        self.bbox_clip_border = bbox_clip_border
+
+    def __call__(self, results):
+        """Call function to do affine transform to input image and labels.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after affine resize, 'affine_aug', 'trans_mat'
+                keys are added in the result dict.
+        """
+        # The results have gone through RandomShiftScale before AffineResize
+        if 'center' not in results:
+            img = results['img']
+            height, width = img.shape[:2]
+            center = np.array([width / 2, height / 2], dtype=np.float32)
+            size = np.array([width, height], dtype=np.float32)
+            results['affine_aug'] = False
+        else:
+            # The results did not go through RandomShiftScale before
+            # AffineResize
+            img = results['img']
+            center = results['center']
+            size = results['size']
+
+        trans_affine = self._get_transform_matrix(center, size, self.img_scale)
+
+        img = cv2.warpAffine(img, trans_affine[:2, :], self.img_scale)
+
+        if isinstance(self.down_ratio, tuple):
+            trans_mat = [
+                self._get_transform_matrix(
+                    center, size,
+                    (self.img_scale[0] // ratio, self.img_scale[1] // ratio))
+                for ratio in self.down_ratio
+            ]  # (3, 3)
+        else:
+            trans_mat = self._get_transform_matrix(
+                center, size, (self.img_scale[0] // self.down_ratio,
+                               self.img_scale[1] // self.down_ratio))
+
+        results['img'] = img
+        results['img_shape'] = img.shape
+        results['pad_shape'] = img.shape
+        results['trans_mat'] = trans_mat
+
+        self._affine_bboxes(results, trans_affine)
+
+        if 'centers2d' in results:
+            centers2d = self._affine_transform(results['centers2d'],
+                                               trans_affine)
+            valid_index = (centers2d[:, 0] >
+                           0) & (centers2d[:, 0] <
+                                 self.img_scale[0]) & (centers2d[:, 1] > 0) & (
+                                     centers2d[:, 1] < self.img_scale[1])
+            results['centers2d'] = centers2d[valid_index]
+
+            for key in results.get('bbox_fields', []):
+                if key in ['gt_bboxes']:
+                    results[key] = results[key][valid_index]
+                    if 'gt_labels' in results:
+                        results['gt_labels'] = results['gt_labels'][
+                            valid_index]
+                    if 'gt_masks' in results:
+                        raise NotImplementedError(
+                            'AffineResize only supports bbox.')
+
+            for key in results.get('bbox3d_fields', []):
+                if key in ['gt_bboxes_3d']:
+                    results[key].tensor = results[key].tensor[valid_index]
+                    if 'gt_labels_3d' in results:
+                        results['gt_labels_3d'] = results['gt_labels_3d'][
+                            valid_index]
+
+            results['depths'] = results['depths'][valid_index]
+
+        return results
+
+    def _affine_bboxes(self, results, matrix):
+        """Affine transform bboxes to input image.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+            matrix (np.ndarray): Matrix transforming original
+                image to the network input image size.
+                shape: (3, 3)
+        """
+
+        for key in results.get('bbox_fields', []):
+            bboxes = results[key]
+            bboxes[:, :2] = self._affine_transform(bboxes[:, :2], matrix)
+            bboxes[:, 2:] = self._affine_transform(bboxes[:, 2:], matrix)
+            if self.bbox_clip_border:
+                bboxes[:,
+                       [0, 2]] = bboxes[:,
+                                        [0, 2]].clip(0, self.img_scale[0] - 1)
+                bboxes[:,
+                       [1, 3]] = bboxes[:,
+                                        [1, 3]].clip(0, self.img_scale[1] - 1)
+            results[key] = bboxes
+
+    def _affine_transform(self, points, matrix):
+        """Affine transform bbox points to input image.
+
+        Args:
+            points (np.ndarray): Points to be transformed.
+                shape: (N, 2)
+            matrix (np.ndarray): Affine transform matrix.
+                shape: (3, 3)
+
+        Returns:
+            np.ndarray: Transformed points.
+        """
+        num_points = points.shape[0]
+        hom_points_2d = np.concatenate((points, np.ones((num_points, 1))),
+                                       axis=1)
+        hom_points_2d = hom_points_2d.T
+        affined_points = np.matmul(matrix, hom_points_2d).T
+        return affined_points[:, :2]
+
+    def _get_transform_matrix(self, center, scale, output_scale):
+        """Get affine transform matrix.
+
+        Args:
+            center (tuple): Center of current image.
+            scale (tuple): Scale of current image.
+            output_scale (tuple[float]): The transform target image scales.
+
+        Returns:
+            np.ndarray: Affine transform matrix.
+        """
+        # TODO: further add rot and shift here.
+        src_w = scale[0]
+        dst_w = output_scale[0]
+        dst_h = output_scale[1]
+
+        src_dir = np.array([0, src_w * -0.5])
+        dst_dir = np.array([0, dst_w * -0.5])
+
+        src = np.zeros((3, 2), dtype=np.float32)
+        dst = np.zeros((3, 2), dtype=np.float32)
+        src[0, :] = center
+        src[1, :] = center + src_dir
+        dst[0, :] = np.array([dst_w * 0.5, dst_h * 0.5])
+        dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+
+        src[2, :] = self._get_ref_point(src[0, :], src[1, :])
+        dst[2, :] = self._get_ref_point(dst[0, :], dst[1, :])
+
+        get_matrix = cv2.getAffineTransform(src, dst)
+
+        matrix = np.concatenate((get_matrix, [[0., 0., 1.]]))
+
+        return matrix.astype(np.float32)
+
+    def _get_ref_point(self, ref_point1, ref_point2):
+        """Get reference point to calculate affine transform matrix.
+
+        While using opencv to calculate the affine matrix, we need at least
+        three corresponding points separately on original image and target
+        image. Here we use two points to get the the third reference point.
+        """
+        d = ref_point1 - ref_point2
+        ref_point3 = ref_point2 + np.array([-d[1], d[0]])
+        return ref_point3
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(img_scale={self.img_scale}, '
+        repr_str += f'down_ratio={self.down_ratio}) '
+        return repr_str
+
+
+@PIPELINES.register_module()
+class RandomShiftScale(object):
+    """Random shift scale.
+
+    Different from the normal shift and scale function, it doesn't
+    directly shift or scale image. It can record the shift and scale
+    infos into loading pipelines. It's designed to be used with
+    AffineResize together.
+
+    Args:
+        shift_scale (tuple[float]): Shift and scale range.
+        aug_prob (float): The shifting and scaling probability.
+    """
+
+    def __init__(self, shift_scale, aug_prob):
+
+        self.shift_scale = shift_scale
+        self.aug_prob = aug_prob
+
+    def __call__(self, results):
+        """Call function to record random shift and scale infos.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after random shift and scale, 'center', 'size'
+                and 'affine_aug' keys are added in the result dict.
+        """
+        img = results['img']
+
+        height, width = img.shape[:2]
+
+        center = np.array([width / 2, height / 2], dtype=np.float32)
+        size = np.array([width, height], dtype=np.float32)
+
+        if random.random() < self.aug_prob:
+            shift, scale = self.shift_scale[0], self.shift_scale[1]
+            shift_ranges = np.arange(-shift, shift + 0.1, 0.1)
+            center[0] += size[0] * random.choice(shift_ranges)
+            center[1] += size[1] * random.choice(shift_ranges)
+            scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1)
+            size *= random.choice(scale_ranges)
+            results['affine_aug'] = True
+        else:
+            results['affine_aug'] = False
+
+        results['center'] = center
+        results['size'] = size
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(shift_scale={self.shift_scale}, '
+        repr_str += f'aug_prob={self.aug_prob}) '
+        return repr_str
--- a/mmdet3d/datasets/s3dis_dataset.py
+++ b/mmdet3d/datasets/s3dis_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
 from os import path as osp

+import numpy as np
+
 from mmdet3d.core import show_seg_result
 from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet.datasets import DATASETS

--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
 import tempfile
 import warnings
 from os import path as osp

+import numpy as np
+
 from mmdet3d.core import show_result, show_seg_result
 from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet.datasets import DATASETS
@@ -78,13 +79,13 @@ class ScanNetDataset(Custom3DDataset):
            index (int): Index of the sample data to get.

        Returns:
-            dict: Data information that will be passed to the data \
+            dict: Data information that will be passed to the data
                preprocessing pipelines. It includes the following keys:

                - sample_idx (str): Sample index.
                - pts_filename (str): Filename of point clouds.
                - file_name (str): Filename of point clouds.
-                - img_prefix (str | None, optional): Prefix of image files.
+                - img_prefix (str, optional): Prefix of image files.
                - img_info (dict, optional): Image info.
                - ann_info (dict): Annotation info.
        """
@@ -129,12 +130,12 @@ class ScanNetDataset(Custom3DDataset):
        Returns:
            dict: annotation information consists of the following keys:

-                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
+                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
                    3D ground truth bboxes
                - gt_labels_3d (np.ndarray): Labels of ground truths.
                - pts_instance_mask_path (str): Path of instance masks.
                - pts_semantic_mask_path (str): Path of semantic masks.
-                - axis_align_matrix (np.ndarray): Transformation matrix for \
+                - axis_align_matrix (np.ndarray): Transformation matrix for
                    global scene alignment.
        """
        # Use index to get the annos, thus the evalhook could also use this api
@@ -172,7 +173,7 @@ class ScanNetDataset(Custom3DDataset):
    def prepare_test_data(self, index):
        """Prepare data for testing.

-        We should take axis_align_matrix from self.data_infos since we need \
+        We should take axis_align_matrix from self.data_infos since we need
            to align point clouds.

        Args:
@@ -272,7 +273,7 @@ class ScanNetSegDataset(Custom3DSegDataset):
            as input. Defaults to None.
        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
-        ignore_index (int, optional): The label index to be ignored, e.g. \
+        ignore_index (int, optional): The label index to be ignored, e.g.
            unannotated points. If None is given, set to len(self.CLASSES).
            Defaults to None.
        scene_idxs (np.ndarray | str, optional): Precomputed index to load
@@ -424,7 +425,7 @@ class ScanNetSegDataset(Custom3DSegDataset):

        Args:
            outputs (list[dict]): Testing results of the dataset.
-            txtfile_prefix (str | None): The prefix of saved files. It includes
+            txtfile_prefix (str): The prefix of saved files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.


--- a/mmdet3d/datasets/sunrgbd_dataset.py
+++ b/mmdet3d/datasets/sunrgbd_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
 from collections import OrderedDict
 from os import path as osp

+import numpy as np
+
 from mmdet3d.core import show_multi_modality_result, show_result
 from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet.core import eval_map
@@ -74,13 +75,13 @@ class SUNRGBDDataset(Custom3DDataset):
            index (int): Index of the sample data to get.

        Returns:
-            dict: Data information that will be passed to the data \
+            dict: Data information that will be passed to the data
                preprocessing pipelines. It includes the following keys:

                - sample_idx (str): Sample index.
                - pts_filename (str, optional): Filename of point clouds.
                - file_name (str, optional): Filename of point clouds.
-                - img_prefix (str | None, optional): Prefix of image files.
+                - img_prefix (str, optional): Prefix of image files.
                - img_info (dict, optional): Image info.
                - calib (dict, optional): Camera calibration info.
                - ann_info (dict): Annotation info.
@@ -125,7 +126,7 @@ class SUNRGBDDataset(Custom3DDataset):
        Returns:
            dict: annotation information consists of the following keys:

-                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \
+                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
                    3D ground truth bboxes
                - gt_labels_3d (np.ndarray): Labels of ground truths.
                - pts_instance_mask_path (str): Path of instance masks.
@@ -239,12 +240,15 @@ class SUNRGBDDataset(Custom3DDataset):

        Args:
            results (list[dict]): List of results.
-            metric (str | list[str]): Metrics to be evaluated.
-            iou_thr (list[float]): AP IoU thresholds.
-            iou_thr_2d (list[float]): AP IoU thresholds for 2d evaluation.
-            show (bool): Whether to visualize.
+            metric (str | list[str], optional): Metrics to be evaluated.
+                Default: None.
+            iou_thr (list[float], optional): AP IoU thresholds for 3D
+                evaluation. Default: (0.25, 0.5).
+            iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
+                evaluation. Default: (0.5, ).
+            show (bool, optional): Whether to visualize.
                Default: False.
-            out_dir (str): Path to save the visualization results.
+            out_dir (str, optional): Path to save the visualization results.
                Default: None.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.

--- a/mmdet3d/datasets/utils.py
+++ b/mmdet3d/datasets/utils.py
@@ -12,7 +12,7 @@ from mmdet3d.datasets.pipelines import (Collect3D, DefaultFormatBundle3D,
                                        PointSegClassMapping)
 # yapf: enable
 from mmdet.datasets.builder import PIPELINES
-from mmdet.datasets.pipelines import LoadImageFromFile
+from mmdet.datasets.pipelines import LoadImageFromFile, MultiScaleFlipAug


 def is_loading_function(transform):
@@ -25,7 +25,7 @@ def is_loading_function(transform):
        transform (dict | :obj:`Pipeline`): A transform config or a function.

    Returns:
-        bool | None: Whether it is a loading function. None means can't judge.
+        bool: Whether it is a loading function. None means can't judge.
            When transform is `MultiScaleFlipAug3D`, we return None.
    """
    # TODO: use more elegant way to distinguish loading modules
@@ -40,12 +40,12 @@ def is_loading_function(transform):
            return False
        if obj_cls in loading_functions:
            return True
-        if obj_cls in (MultiScaleFlipAug3D, ):
+        if obj_cls in (MultiScaleFlipAug3D, MultiScaleFlipAug):
            return None
    elif callable(transform):
        if isinstance(transform, loading_functions):
            return True
-        if isinstance(transform, MultiScaleFlipAug3D):
+        if isinstance(transform, (MultiScaleFlipAug3D, MultiScaleFlipAug)):
            return None
    return False

@@ -92,7 +92,7 @@ def get_loading_pipeline(pipeline):
        ...    dict(type='Collect3D',
        ...         keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])
        ...    ]
-        >>> assert expected_pipelines ==\
+        >>> assert expected_pipelines == \
        ...        get_loading_pipeline(pipelines)
    """
    loading_pipeline = []
@@ -126,7 +126,7 @@ def extract_result_dict(results, key):
        key (str): Key of the desired data.

    Returns:
-        np.ndarray | torch.Tensor | None: Data term.
+        np.ndarray | torch.Tensor: Data term.
    """
    if key not in results.keys():
        return None

--- a/mmdet3d/datasets/waymo_dataset.py
+++ b/mmdet3d/datasets/waymo_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import mmcv
-import numpy as np
 import os
 import tempfile
+from os import path as osp
+
+import mmcv
+import numpy as np
 import torch
 from mmcv.utils import print_log
-from os import path as osp

 from mmdet.datasets import DATASETS
 from ..core.bbox import Box3DMode, points_cam2img
@@ -46,8 +47,9 @@ class WaymoDataset(KittiDataset):
            Defaults to True.
        test_mode (bool, optional): Whether the dataset is in test mode.
            Defaults to False.
-        pcd_limit_range (list): The range of point cloud used to filter
-            invalid predicted boxes. Default: [-85, -85, -5, 85, 85, 5].
+        pcd_limit_range (list(float), optional): The range of point cloud used
+            to filter invalid predicted boxes.
+            Default: [-85, -85, -5, 85, 85, 5].
    """

    CLASSES = ('Car', 'Cyclist', 'Pedestrian')
@@ -100,7 +102,7 @@ class WaymoDataset(KittiDataset):

                - sample_idx (str): sample index
                - pts_filename (str): filename of point clouds
-                - img_prefix (str | None): prefix of image files
+                - img_prefix (str): prefix of image files
                - img_info (dict): image info
                - lidar2img (list[np.ndarray], optional): transformations from
                    lidar to different cameras
@@ -140,15 +142,15 @@ class WaymoDataset(KittiDataset):

        Args:
            outputs (list[dict]): Testing results of the dataset.
-            pklfile_prefix (str | None): The prefix of pkl files. It includes
+            pklfile_prefix (str): The prefix of pkl files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
-            submission_prefix (str | None): The prefix of submitted files. It
+            submission_prefix (str): The prefix of submitted files. It
                includes the file path and the prefix of filename, e.g.,
                "a/b/prefix". If not specified, a temp file will be created.
                Default: None.
-            data_format (str | None): Output data format. Default: 'waymo'.
-                Another supported choice is 'kitti'.
+            data_format (str, optional): Output data format.
+                Default: 'waymo'. Another supported choice is 'kitti'.

        Returns:
            tuple: (result_files, tmp_dir), result_files is a dict containing
@@ -226,18 +228,18 @@ class WaymoDataset(KittiDataset):

        Args:
            results (list[dict]): Testing results of the dataset.
-            metric (str | list[str]): Metrics to be evaluated.
+            metric (str | list[str], optional): Metrics to be evaluated.
                Default: 'waymo'. Another supported metric is 'kitti'.
-            logger (logging.Logger | str | None): Logger used for printing
+            logger (logging.Logger | str, optional): Logger used for printing
                related information during evaluation. Default: None.
-            pklfile_prefix (str | None): The prefix of pkl files. It includes
+            pklfile_prefix (str, optional): The prefix of pkl files including
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
-            submission_prefix (str | None): The prefix of submission datas.
+            submission_prefix (str, optional): The prefix of submission data.
                If not specified, the submission data will not be generated.
-            show (bool): Whether to visualize.
+            show (bool, optional): Whether to visualize.
                Default: False.
-            out_dir (str): Path to save the visualization results.
+            out_dir (str, optional): Path to save the visualization results.
                Default: None.
            pipeline (list[dict], optional): raw data loading for showing.
                Default: None.
@@ -349,8 +351,8 @@ class WaymoDataset(KittiDataset):
        if tmp_dir is not None:
            tmp_dir.cleanup()

-        if show:
-            self.show(results, out_dir, pipeline=pipeline)
+        if show or out_dir:
+            self.show(results, out_dir, show=show, pipeline=pipeline)
        return ap_dict

    def bbox2result_kitti(self,
@@ -364,8 +366,8 @@ class WaymoDataset(KittiDataset):
            net_outputs (List[np.ndarray]): list of array storing the
                bbox and score
            class_nanes (List[String]): A list of class names
-            pklfile_prefix (str | None): The prefix of pkl file.
-            submission_prefix (str | None): The prefix of submission file.
+            pklfile_prefix (str): The prefix of pkl file.
+            submission_prefix (str): The prefix of submission file.

        Returns:
            List[dict]: A list of dict have the kitti 3d format
@@ -494,7 +496,6 @@ class WaymoDataset(KittiDataset):
        scores = box_dict['scores_3d']
        labels = box_dict['labels_3d']
        sample_idx = info['image']['image_idx']
-        # TODO: remove the hack of yaw
        box_preds.limit_yaw(offset=0.5, period=np.pi * 2)

        if len(box_preds) == 0:

--- a/mmdet3d/models/backbones/__init__.py
+++ b/mmdet3d/models/backbones/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt
+from .dgcnn import DGCNNBackbone
+from .dla import DLANet
 from .multi_backbone import MultiBackbone
 from .nostem_regnet import NoStemRegNet
 from .pointnet2_sa_msg import PointNet2SAMSG
@@ -8,5 +10,6 @@ from .second import SECOND

 __all__ = [
    'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet',
-    'SECOND', 'PointNet2SASSG', 'PointNet2SAMSG', 'MultiBackbone'
+    'SECOND', 'DGCNNBackbone', 'PointNet2SASSG', 'PointNet2SAMSG',
+    'MultiBackbone', 'DLANet'
 ]
--- a/mmdet3d/models/backbones/base_pointnet.py
+++ b/mmdet3d/models/backbones/base_pointnet.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import warnings
 from abc import ABCMeta
+
 from mmcv.runner import BaseModule



--- a/mmdet3d/models/backbones/dgcnn.py
+++ b/mmdet3d/models/backbones/dgcnn.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import BaseModule, auto_fp16
+from torch import nn as nn
+
+from mmdet3d.ops import DGCNNFAModule, DGCNNGFModule
+from mmdet.models import BACKBONES
+
+
+@BACKBONES.register_module()
+class DGCNNBackbone(BaseModule):
+    """Backbone network for DGCNN.
+
+    Args:
+        in_channels (int): Input channels of point cloud.
+        num_samples (tuple[int], optional): The number of samples for knn or
+            ball query in each graph feature (GF) module.
+            Defaults to (20, 20, 20).
+        knn_modes (tuple[str], optional): Mode of KNN of each knn module.
+            Defaults to ('D-KNN', 'F-KNN', 'F-KNN').
+        radius (tuple[float], optional): Sampling radii of each GF module.
+            Defaults to (None, None, None).
+        gf_channels (tuple[tuple[int]], optional): Out channels of each mlp in
+            GF module. Defaults to ((64, 64), (64, 64), (64, )).
+        fa_channels (tuple[int], optional): Out channels of each mlp in FA
+            module. Defaults to (1024, ).
+        act_cfg (dict, optional): Config of activation layer.
+            Defaults to dict(type='ReLU').
+        init_cfg (dict, optional): Initialization config.
+            Defaults to None.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 num_samples=(20, 20, 20),
+                 knn_modes=('D-KNN', 'F-KNN', 'F-KNN'),
+                 radius=(None, None, None),
+                 gf_channels=((64, 64), (64, 64), (64, )),
+                 fa_channels=(1024, ),
+                 act_cfg=dict(type='ReLU'),
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
+        self.num_gf = len(gf_channels)
+
+        assert len(num_samples) == len(knn_modes) == len(radius) == len(
+            gf_channels), 'Num_samples, knn_modes, radius and gf_channels \
+            should have the same length.'
+
+        self.GF_modules = nn.ModuleList()
+        gf_in_channel = in_channels * 2
+        skip_channel_list = [gf_in_channel]  # input channel list
+
+        for gf_index in range(self.num_gf):
+            cur_gf_mlps = list(gf_channels[gf_index])
+            cur_gf_mlps = [gf_in_channel] + cur_gf_mlps
+            gf_out_channel = cur_gf_mlps[-1]
+
+            self.GF_modules.append(
+                DGCNNGFModule(
+                    mlp_channels=cur_gf_mlps,
+                    num_sample=num_samples[gf_index],
+                    knn_mode=knn_modes[gf_index],
+                    radius=radius[gf_index],
+                    act_cfg=act_cfg))
+            skip_channel_list.append(gf_out_channel)
+            gf_in_channel = gf_out_channel * 2
+
+        fa_in_channel = sum(skip_channel_list[1:])
+        cur_fa_mlps = list(fa_channels)
+        cur_fa_mlps = [fa_in_channel] + cur_fa_mlps
+
+        self.FA_module = DGCNNFAModule(
+            mlp_channels=cur_fa_mlps, act_cfg=act_cfg)
+
+    @auto_fp16(apply_to=('points', ))
+    def forward(self, points):
+        """Forward pass.
+
+        Args:
+            points (torch.Tensor): point coordinates with features,
+                with shape (B, N, in_channels).
+
+        Returns:
+            dict[str, list[torch.Tensor]]: Outputs after graph feature (GF) and
+                feature aggregation (FA) modules.
+
+                - gf_points (list[torch.Tensor]): Outputs after each GF module.
+                - fa_points (torch.Tensor): Outputs after FA module.
+        """
+        gf_points = [points]
+
+        for i in range(self.num_gf):
+            cur_points = self.GF_modules[i](gf_points[i])
+            gf_points.append(cur_points)
+
+        fa_points = self.FA_module(gf_points)
+
+        out = dict(gf_points=gf_points, fa_points=fa_points)
+        return out
--- a/mmdet3d/models/backbones/dla.py
+++ b/mmdet3d/models/backbones/dla.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import torch
+from mmcv.cnn import build_conv_layer, build_norm_layer
+from mmcv.runner import BaseModule
+from torch import nn
+
+from mmdet.models.builder import BACKBONES
+
+
+def dla_build_norm_layer(cfg, num_features):
+    """Build normalization layer specially designed for DLANet.
+
+    Args:
+        cfg (dict): The norm layer config, which should contain:
+
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate a norm layer.
+            - requires_grad (bool, optional): Whether stop gradient updates.
+        num_features (int): Number of input channels.
+
+
+    Returns:
+        Function: Build normalization layer in mmcv.
+    """
+    cfg_ = cfg.copy()
+    if cfg_['type'] == 'GN':
+        if num_features % 32 == 0:
+            return build_norm_layer(cfg_, num_features)
+        else:
+            assert 'num_groups' in cfg_
+            cfg_['num_groups'] = cfg_['num_groups'] // 2
+            return build_norm_layer(cfg_, num_features)
+    else:
+        return build_norm_layer(cfg_, num_features)
+
+
+class BasicBlock(BaseModule):
+    """BasicBlock in DLANet.
+
+    Args:
+        in_channels (int): Input feature channel.
+        out_channels (int): Output feature channel.
+        norm_cfg (dict): Dictionary to construct and config
+            norm layer.
+        conv_cfg (dict): Dictionary to construct and config
+            conv layer.
+        stride (int, optional): Conv stride. Default: 1.
+        dilation (int, optional): Conv dilation. Default: 1.
+        init_cfg (dict, optional): Initialization config.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 norm_cfg,
+                 conv_cfg,
+                 stride=1,
+                 dilation=1,
+                 init_cfg=None):
+        super(BasicBlock, self).__init__(init_cfg)
+        self.conv1 = build_conv_layer(
+            conv_cfg,
+            in_channels,
+            out_channels,
+            3,
+            stride=stride,
+            padding=dilation,
+            dilation=dilation,
+            bias=False)
+        self.norm1 = dla_build_norm_layer(norm_cfg, out_channels)[1]
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = build_conv_layer(
+            conv_cfg,
+            out_channels,
+            out_channels,
+            3,
+            stride=1,
+            padding=dilation,
+            dilation=dilation,
+            bias=False)
+        self.norm2 = dla_build_norm_layer(norm_cfg, out_channels)[1]
+        self.stride = stride
+
+    def forward(self, x, identity=None):
+        """Forward function."""
+
+        if identity is None:
+            identity = x
+        out = self.conv1(x)
+        out = self.norm1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.norm2(out)
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class Root(BaseModule):
+    """Root in DLANet.
+
+    Args:
+        in_channels (int): Input feature channel.
+        out_channels (int): Output feature channel.
+        norm_cfg (dict): Dictionary to construct and config
+            norm layer.
+        conv_cfg (dict): Dictionary to construct and config
+            conv layer.
+        kernel_size (int): Size of convolution kernel.
+        add_identity (bool): Whether to add identity in root.
+        init_cfg (dict, optional): Initialization config.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 norm_cfg,
+                 conv_cfg,
+                 kernel_size,
+                 add_identity,
+                 init_cfg=None):
+        super(Root, self).__init__(init_cfg)
+        self.conv = build_conv_layer(
+            conv_cfg,
+            in_channels,
+            out_channels,
+            1,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+            bias=False)
+        self.norm = dla_build_norm_layer(norm_cfg, out_channels)[1]
+        self.relu = nn.ReLU(inplace=True)
+        self.add_identity = add_identity
+
+    def forward(self, feat_list):
+        """Forward function.
+
+        Args:
+            feat_list (list[torch.Tensor]): Output features from
+                multiple layers.
+        """
+        children = feat_list
+        x = self.conv(torch.cat(feat_list, 1))
+        x = self.norm(x)
+        if self.add_identity:
+            x += children[0]
+        x = self.relu(x)
+
+        return x
+
+
+class Tree(BaseModule):
+    """Tree in DLANet.
+
+    Args:
+        levels (int): The level of the tree.
+        block (nn.Module): The block module in tree.
+        in_channels: Input feature channel.
+        out_channels: Output feature channel.
+        norm_cfg (dict): Dictionary to construct and config
+            norm layer.
+        conv_cfg (dict): Dictionary to construct and config
+            conv layer.
+        stride (int, optional): Convolution stride.
+            Default: 1.
+        level_root (bool, optional): whether belongs to the
+            root layer.
+        root_dim (int, optional): Root input feature channel.
+        root_kernel_size (int, optional): Size of root
+            convolution kernel. Default: 1.
+        dilation (int, optional): Conv dilation. Default: 1.
+        add_identity (bool, optional): Whether to add
+            identity in root. Default: False.
+        init_cfg (dict, optional): Initialization config.
+            Default: None.
+    """
+
+    def __init__(self,
+                 levels,
+                 block,
+                 in_channels,
+                 out_channels,
+                 norm_cfg,
+                 conv_cfg,
+                 stride=1,
+                 level_root=False,
+                 root_dim=None,
+                 root_kernel_size=1,
+                 dilation=1,
+                 add_identity=False,
+                 init_cfg=None):
+        super(Tree, self).__init__(init_cfg)
+        if root_dim is None:
+            root_dim = 2 * out_channels
+        if level_root:
+            root_dim += in_channels
+        if levels == 1:
+            self.root = Root(root_dim, out_channels, norm_cfg, conv_cfg,
+                             root_kernel_size, add_identity)
+            self.tree1 = block(
+                in_channels,
+                out_channels,
+                norm_cfg,
+                conv_cfg,
+                stride,
+                dilation=dilation)
+            self.tree2 = block(
+                out_channels,
+                out_channels,
+                norm_cfg,
+                conv_cfg,
+                1,
+                dilation=dilation)
+        else:
+            self.tree1 = Tree(
+                levels - 1,
+                block,
+                in_channels,
+                out_channels,
+                norm_cfg,
+                conv_cfg,
+                stride,
+                root_dim=None,
+                root_kernel_size=root_kernel_size,
+                dilation=dilation,
+                add_identity=add_identity)
+            self.tree2 = Tree(
+                levels - 1,
+                block,
+                out_channels,
+                out_channels,
+                norm_cfg,
+                conv_cfg,
+                root_dim=root_dim + out_channels,
+                root_kernel_size=root_kernel_size,
+                dilation=dilation,
+                add_identity=add_identity)
+        self.level_root = level_root
+        self.root_dim = root_dim
+        self.downsample = None
+        self.project = None
+        self.levels = levels
+        if stride > 1:
+            self.downsample = nn.MaxPool2d(stride, stride=stride)
+        if in_channels != out_channels:
+            self.project = nn.Sequential(
+                build_conv_layer(
+                    conv_cfg,
+                    in_channels,
+                    out_channels,
+                    1,
+                    stride=1,
+                    bias=False),
+                dla_build_norm_layer(norm_cfg, out_channels)[1])
+
+    def forward(self, x, identity=None, children=None):
+        children = [] if children is None else children
+        bottom = self.downsample(x) if self.downsample else x
+        identity = self.project(bottom) if self.project else bottom
+        if self.level_root:
+            children.append(bottom)
+        x1 = self.tree1(x, identity)
+        if self.levels == 1:
+            x2 = self.tree2(x1)
+            feat_list = [x2, x1] + children
+            x = self.root(feat_list)
+        else:
+            children.append(x1)
+            x = self.tree2(x1, children=children)
+        return x
+
+
+@BACKBONES.register_module()
+class DLANet(BaseModule):
+    r"""`DLA backbone <https://arxiv.org/abs/1707.06484>`_.
+
+    Args:
+        depth (int): Depth of DLA. Default: 34.
+        in_channels (int, optional): Number of input image channels.
+            Default: 3.
+        norm_cfg (dict, optional): Dictionary to construct and config
+            norm layer. Default: None.
+        conv_cfg (dict, optional): Dictionary to construct and config
+            conv layer. Default: None.
+        layer_with_level_root (list[bool], optional): Whether to apply
+            level_root in each DLA layer, this is only used for
+            tree levels. Default: (False, True, True, True).
+        with_identity_root (bool, optional): Whether to add identity
+            in root layer. Default: False.
+        pretrained (str, optional): model pretrained path.
+            Default: None.
+        init_cfg (dict or list[dict], optional): Initialization
+            config dict. Default: None
+    """
+    arch_settings = {
+        34: (BasicBlock, (1, 1, 1, 2, 2, 1), (16, 32, 64, 128, 256, 512)),
+    }
+
+    def __init__(self,
+                 depth,
+                 in_channels=3,
+                 out_indices=(0, 1, 2, 3, 4, 5),
+                 frozen_stages=-1,
+                 norm_cfg=None,
+                 conv_cfg=None,
+                 layer_with_level_root=(False, True, True, True),
+                 with_identity_root=False,
+                 pretrained=None,
+                 init_cfg=None):
+        super(DLANet, self).__init__(init_cfg)
+        if depth not in self.arch_settings:
+            raise KeyError(f'invalida depth {depth} for DLA')
+
+        assert not (init_cfg and pretrained), \
+            'init_cfg and pretrained cannot be setting at the same time'
+        if isinstance(pretrained, str):
+            warnings.warn('DeprecationWarning: pretrained is a deprecated, '
+                          'please use "init_cfg" instead')
+            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
+        elif pretrained is None:
+            if init_cfg is None:
+                self.init_cfg = [
+                    dict(type='Kaiming', layer='Conv2d'),
+                    dict(
+                        type='Constant',
+                        val=1,
+                        layer=['_BatchNorm', 'GroupNorm'])
+                ]
+
+        block, levels, channels = self.arch_settings[depth]
+        self.channels = channels
+        self.num_levels = len(levels)
+        self.frozen_stages = frozen_stages
+        self.out_indices = out_indices
+        assert max(out_indices) < self.num_levels
+        self.base_layer = nn.Sequential(
+            build_conv_layer(
+                conv_cfg,
+                in_channels,
+                channels[0],
+                7,
+                stride=1,
+                padding=3,
+                bias=False),
+            dla_build_norm_layer(norm_cfg, channels[0])[1],
+            nn.ReLU(inplace=True))
+
+        # DLANet first uses two conv layers then uses several
+        # Tree layers
+        for i in range(2):
+            level_layer = self._make_conv_level(
+                channels[0],
+                channels[i],
+                levels[i],
+                norm_cfg,
+                conv_cfg,
+                stride=i + 1)
+            layer_name = f'level{i}'
+            self.add_module(layer_name, level_layer)
+
+        for i in range(2, self.num_levels):
+            dla_layer = Tree(
+                levels[i],
+                block,
+                channels[i - 1],
+                channels[i],
+                norm_cfg,
+                conv_cfg,
+                2,
+                level_root=layer_with_level_root[i - 2],
+                add_identity=with_identity_root)
+            layer_name = f'level{i}'
+            self.add_module(layer_name, dla_layer)
+
+        self._freeze_stages()
+
+    def _make_conv_level(self,
+                         in_channels,
+                         out_channels,
+                         num_convs,
+                         norm_cfg,
+                         conv_cfg,
+                         stride=1,
+                         dilation=1):
+        """Conv modules.
+
+        Args:
+            in_channels (int): Input feature channel.
+            out_channels (int): Output feature channel.
+            num_convs (int): Number of Conv module.
+            norm_cfg (dict): Dictionary to construct and config
+                norm layer.
+            conv_cfg (dict): Dictionary to construct and config
+                conv layer.
+            stride (int, optional): Conv stride. Default: 1.
+            dilation (int, optional): Conv dilation. Default: 1.
+        """
+        modules = []
+        for i in range(num_convs):
+            modules.extend([
+                build_conv_layer(
+                    conv_cfg,
+                    in_channels,
+                    out_channels,
+                    3,
+                    stride=stride if i == 0 else 1,
+                    padding=dilation,
+                    bias=False,
+                    dilation=dilation),
+                dla_build_norm_layer(norm_cfg, out_channels)[1],
+                nn.ReLU(inplace=True)
+            ])
+            in_channels = out_channels
+        return nn.Sequential(*modules)
+
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            self.base_layer.eval()
+            for param in self.base_layer.parameters():
+                param.requires_grad = False
+
+            for i in range(2):
+                m = getattr(self, f'level{i}')
+                m.eval()
+                for param in m.parameters():
+                    param.requires_grad = False
+
+        for i in range(1, self.frozen_stages + 1):
+            m = getattr(self, f'level{i+1}')
+            m.eval()
+            for param in m.parameters():
+                param.requires_grad = False
+
+    def forward(self, x):
+        outs = []
+        x = self.base_layer(x)
+        for i in range(self.num_levels):
+            x = getattr(self, 'level{}'.format(i))(x)
+            if i in self.out_indices:
+                outs.append(x)
+        return tuple(outs)
--- a/mmdet3d/models/backbones/multi_backbone.py
+++ b/mmdet3d/models/backbones/multi_backbone.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
-import torch
 import warnings
+
+import torch
 from mmcv.cnn import ConvModule
 from mmcv.runner import BaseModule, auto_fp16
 from torch import nn as nn