Refactor anchor generator and box coder

4040dbda · zhangwenwei · 148fea12 · 4040dbda · 4040dbda · 4040dbda
Commit 4040dbda authored Apr 27, 2020 by zhangwenwei
20 changed files
--- a/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
@@ -72,18 +72,21 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[
                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
                [0, -40.0, -1.78, 70.4, 40.0, -1.78],
            ],
+            strides=[2],
+            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=False),
        assigner_per_size=True,
-        anchor_strides=[2],
-        anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
        diff_rad_by_sin=True,
        assign_per_class=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
@@ -43,12 +43,15 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
-        anchor_strides=[2],
-        anchor_sizes=[[1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -116,7 +119,7 @@ train_pipeline = [
        loc_noise_std=[0.25, 0.25, 0.25],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],

--- a/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
@@ -41,18 +41,21 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[
                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
                [0, -40.0, -1.78, 70.4, 40.0, -1.78],
            ],
-        anchor_strides=[2],
-        anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+            strides=[2],
+            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=False),
        diff_rad_by_sin=True,
        assigner_per_size=True,
        assign_per_class=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -41,12 +41,15 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
-        anchor_strides=[2],
-        anchor_sizes=[[1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -113,7 +116,7 @@ train_pipeline = [
        loc_noise_std=[1.0, 1.0, 0.5],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.78539816, 0.78539816]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],

--- a/configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
+++ b/configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='FasterRCNN',
-    pretrained=('./pretrain_detectron/'
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -22,11 +21,15 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        anchor_scales=[8],
-        anchor_ratios=[1 / 3, 0.5, 1.0, 2.0, 3.0],
-        anchor_strides=[4, 8, 16, 32, 64],
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -43,8 +46,10 @@ model = dict(
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
                target_means=[0., 0., 0., 0.],
-            target_stds=[0.1, 0.1, 0.2, 0.2],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

--- a/configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
@@ -42,12 +42,15 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
-        anchor_strides=[2],
-        anchor_sizes=[[1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -113,7 +116,7 @@ train_pipeline = [
        loc_noise_std=[0.25, 0.25, 0.25],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],

--- a/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -41,12 +41,15 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
-        anchor_strides=[2],
-        anchor_sizes=[[1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -111,7 +114,7 @@ train_pipeline = [
        loc_noise_std=[1.0, 1.0, 0.5],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.78539816, 0.78539816]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],

--- a/configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
+++ b/configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='FasterRCNN',
-    pretrained=('./pretrain_detectron/'
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -22,11 +21,15 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        anchor_scales=[8],
-        anchor_ratios=[0.5, 1.0, 2.0],
-        anchor_strides=[4, 8, 16, 32, 64],
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -42,9 +45,11 @@ model = dict(
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
-            num_classes=80,
+            num_classes=10,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
                target_means=[0., 0., 0., 0.],
-            target_stds=[0.1, 0.1, 0.2, 0.2],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
@@ -105,12 +110,14 @@ test_cfg = dict(
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
 )
 # dataset settings
-dataset_type = 'NuScenes2DDataset'
+dataset_type = 'CocoDataset'
 data_root = 'data/nuscenes/'
 # Values to be used for image normalization (BGR order)
 # Default mean pixel values are from ImageNet: [103.53, 116.28, 123.675]
 # When using pre-trained models in Detectron1 or any MSRA models,
 # std has been absorbed into its conv1 weights, so the std needs to be set 1.
+classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+           'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
 img_norm_cfg = dict(
    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 train_pipeline = [
@@ -147,14 +154,17 @@ data = dict(
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_train.coco.json',
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_val.coco.json',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_val.coco.json',
        pipeline=test_pipeline))
 # optimizer

--- a/configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py
+++ b/configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py
@@ -51,18 +51,19 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
-            [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],  # car
-            [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],  # truck
-            [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],  # trailer
-            [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],  # bicycle
-            [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],  # pedestrian
-            [-49.6, -49.6, -1.80984986, 49.6, 49.6,
-             -1.80984986],  # traffic_cone
-            [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],  # barrier
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[
+                [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
+                [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
+                [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
+                [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
+                [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
+                [-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
+                [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
            ],
-        anchor_strides=[2],
-        anchor_sizes=[
+            strides=[2],
+            sizes=[
                [1.95017717, 4.60718145, 1.72270761],  # car
                [2.4560939, 6.73778078, 2.73004906],  # truck
                [2.87427237, 12.01320693, 3.81509561],  # trailer
@@ -71,13 +72,14 @@ model = dict(
                [0.39694519, 0.40359262, 1.06232151],  # traffic_cone
                [2.49008838, 0.48578221, 0.98297065],  # barrier
            ],
-        anchor_custom_values=[0, 0],
-        anchor_rotations=[0, 1.57],
+            custom_values=[0, 0],
+            rotations=[0, 1.57],
+            reshape_out=True),
        assigner_per_size=False,
        diff_rad_by_sin=True,
        dir_offset=0.7854,  # pi/4
        dir_limit_offset=0,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
+++ b/configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='RetinaNet',
-    pretrained=('./pretrain_detectron/'
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -26,12 +25,16 @@ model = dict(
        in_channels=256,
        stacked_convs=4,
        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
            octave_base_scale=4,
            scales_per_octave=3,
-        anchor_ratios=[0.5, 1.0, 2.0],
-        anchor_strides=[8, 16, 32, 64, 128],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/mmdet3d/apis/__init__.py
+++ b/mmdet3d/apis/__init__.py
-from .train import train_detector
+from .train import batch_processor, train_detector

-__all__ = [
-    'train_detector',
-]
+__all__ = ['batch_processor', 'train_detector']
--- a/mmdet3d/core/anchor/__init__.py
+++ b/mmdet3d/core/anchor/__init__.py
-from .anchor_3d_generator import (AlignedAnchorGeneratorRange,
-                                  AnchorGeneratorRange)
+from mmdet.core.anchor import build_anchor_generator
+from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator,
+                                  Anchor3DRangeGenerator)

 __all__ = [
-    'AlignedAnchorGeneratorRange', 'AnchorGeneratorRange',
+    'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator',
    'build_anchor_generator'
 ]
-
-
-def build_anchor_generator(cfg, **kwargs):
-    from . import anchor_3d_generator
-    import mmcv
-    if isinstance(cfg, dict):
-        return mmcv.runner.obj_from_dict(
-            cfg, anchor_3d_generator, default_args=kwargs)
-    else:
-        raise TypeError('Invalid type {} for building a sampler'.format(
-            type(cfg)))
--- a/mmdet3d/core/anchor/anchor_3d_generator.py
+++ b/mmdet3d/core/anchor/anchor_3d_generator.py
+import mmcv
 import torch

+from mmdet.core.anchor import ANCHOR_GENERATORS

-class AnchorGeneratorRange(object):
+
+@ANCHOR_GENERATORS.register_module
+class Anchor3DRangeGenerator(object):
+    """3D Anchor Generator by range
+
+    This anchor generator generates anchors by the given range in different
+    feature levels.
+    Due the convention in 3D detection, different anchor sizes are related to
+    different ranges for different categories. However we find this setting
+    does not effect the performance much in some datasets, e.g., nuScenes.
+
+    Args:
+        ranges (list[list[float]]): Ranges of different anchors.
+            The ranges are the same across different feature levels. But may
+            vary for different anchor sizes if size_per_range is True.
+        sizes (list[list[float]]): 3D sizes of anchors.
+        strides (list[int]): Strides of anchors in different feature levels.
+        rotations (list(float)): Rotations of anchors in a feature grid.
+        custom_values (tuple(float)): Customized values of that anchor. For
+            example, in nuScenes the anchors have velocities.
+        reshape_out (bool): Whether to reshape the output into (Nx4)
+        size_per_range: Whether to use separate ranges for different sizes.
+            If size_per_range is True, the ranges should have the same length
+            as the sizes, if not, it will be duplicated.
+    """

    def __init__(self,
-                 anchor_ranges,
-                 sizes=((1.6, 3.9, 1.56), ),
-                 stride=2,
-                 rotations=(0, 3.1415926 / 2),
+                 ranges,
+                 sizes=[[1.6, 3.9, 1.56]],
+                 strides=[2],
+                 rotations=[0, 1.5707963],
                 custom_values=(),
-                 cache_anchor=False):
+                 reshape_out=True,
+                 size_per_range=True):
+        assert mmcv.is_list_of(ranges, list)
+        if size_per_range:
+            if len(sizes) != len(ranges):
+                assert len(ranges) == 1
+                ranges = ranges * len(sizes)
+            assert len(ranges) == len(sizes)
+        else:
+            assert len(ranges) == 1
+        assert mmcv.is_list_of(sizes, list)
+        assert isinstance(strides, list)
+
        self.sizes = sizes
-        self.stride = stride
-        self.anchor_ranges = anchor_ranges
-        if len(anchor_ranges) != len(sizes):
-            self.anchor_ranges = anchor_ranges * len(sizes)
+        self.strides = strides
+        self.ranges = ranges
        self.rotations = rotations
        self.custom_values = custom_values
-        self.cache_anchor = cache_anchor
        self.cached_anchors = None
+        self.reshape_out = reshape_out
+        self.size_per_range = size_per_range

    def __repr__(self):
        s = self.__class__.__name__ + '('
-        s += 'anchor_range={}, '.format(self.anchor_ranges)
-        s += 'stride={}, '.format(self.stride)
-        s += 'sizes={}, '.format(self.sizes)
-        s += 'rotations={})'.format(self.rotations)
+        s += f'anchor_range={self.ranges},\n'
+        s += f'strides={self.strides},\n'
+        s += f'sizes={self.sizes},\n'
+        s += f'rotations={self.rotations},\n'
+        s += f'reshape_out={self.reshape_out},\n'
+        s += f'size_per_range={self.size_per_range})'
        return s

    @property
@@ -34,40 +73,68 @@ class AnchorGeneratorRange(object):
        num_size = torch.tensor(self.sizes).reshape(-1, 3).size(0)
        return num_rot * num_size

-    def grid_anchors(self, feature_map_size, device='cuda'):
+    @property
+    def num_levels(self):
+        return len(self.strides)
+
+    def grid_anchors(self, featmap_sizes, device='cuda'):
+        """Generate grid anchors in multiple feature levels
+
+        Args:
+            featmap_sizes (list[tuple]): List of feature map sizes in
+                multiple feature levels.
+            device (str): Device where the anchors will be put on.
+
+        Return:
+            list[torch.Tensor]: Anchors in multiple feature levels.
+                The sizes of each tensor should be [N, 4], where
+                N = width * height * num_base_anchors, width and height
+                are the sizes of the corresponding feature lavel,
+                num_base_anchors is the number of anchors for that level.
+        """
+        assert self.num_levels == len(featmap_sizes)
+        multi_level_anchors = []
+        for i in range(self.num_levels):
+            anchors = self.single_level_grid_anchors(
+                featmap_sizes[i], self.strides[i], device=device)
+            if self.reshape_out:
+                anchors = anchors.reshape(-1, anchors.size(-1))
+            multi_level_anchors.append(anchors)
+        return multi_level_anchors
+
+    def single_level_grid_anchors(self, featmap_size, stride, device='cuda'):
        # We reimplement the anchor generator using torch in cuda
        # torch: 0.6975 s for 1000 times
        # numpy: 4.3345 s for 1000 times
-        # which is ~5 times faster than numpy implementation
-        if (self.cache_anchor and self.cached_anchors):
-            return self.cached_anchors
-        if not isinstance(self.anchor_ranges[0], list):
+        # which is ~5 times faster than the numpy implementation
+        if not self.size_per_range:
            return self.anchors_single_range(
-                feature_map_size,
-                self.anchor_ranges,
+                featmap_size,
+                self.ranges[0],
+                stride,
                self.sizes,
                self.rotations,
                device=device)
-        assert len(self.sizes) == len(self.anchor_ranges)
+
        mr_anchors = []
-        for anchor_range, anchor_size in zip(self.anchor_ranges, self.sizes):
+        for anchor_range, anchor_size in zip(self.ranges, self.sizes):
            mr_anchors.append(
                self.anchors_single_range(
-                    feature_map_size,
+                    featmap_size,
                    anchor_range,
+                    stride,
                    anchor_size,
                    self.rotations,
                    device=device))
        mr_anchors = torch.cat(mr_anchors, dim=-3)
-        if self.cache_anchor and not self.cached_anchors:
-            self.cached_anchors = mr_anchors
        return mr_anchors

    def anchors_single_range(self,
                             feature_size,
                             anchor_range,
-                             sizes=((1.6, 3.9, 1.56), ),
-                             rotations=(0, 3.1415927 / 2),
+                             stride=1,
+                             sizes=[[1.6, 3.9, 1.56]],
+                             rotations=[0, 1.5707963],
                             device='cuda'):
        """Generate anchors in a single range
        Args:
@@ -106,7 +173,6 @@ class AnchorGeneratorRange(object):

        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])
        # [1, 200, 176, N, 2, 7] for kitti after permute
-        # ret = ret.reshape(-1, 7)

        if len(self.custom_values) > 0:
            custom_ndim = len(self.custom_values)
@@ -117,17 +183,42 @@ class AnchorGeneratorRange(object):
        return ret


-class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
+@ANCHOR_GENERATORS.register_module
+class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):
+    """Aligned 3D Anchor Generator by range
+
+    This anchor generator uses a different manner to generate the positions
+    of anchors' centers from `Anchor3DRangeGenerator`.
+
+    Note:
+    The `align` means that the anchor's center is aligned with the voxel grid,
+    which is also the feature grid. The previous implementation of
+    `Anchor3DRangeGenerator` do not generate the anchors' center according
+    to the voxel grid. Rather, it generates the center by uniformly
+    distributing the anchors inside the minimum and maximum anchor ranges
+    according to the feature map sizes.
+    However, this makes the anchors center does not match the feature grid.
+    The AlignedAnchor3DRangeGenerator add + 1 when using the feature map sizes
+    to obtain the corners of the voxel grid. Then it shift the coordinates to
+    the center of voxel grid of use the left up corner to distribute anchors.
+
+    Args:
+        anchor_corner (bool): Whether to align with the corner of the voxel
+            grid. By default it is False and the anchor's center will be
+            the same as the corresponding voxel's center, which is also the
+            center of the corresponding greature grid.
+    """

-    def __init__(self, shift_center=True, **kwargs):
-        super(AlignedAnchorGeneratorRange, self).__init__(**kwargs)
-        self.shift_center = shift_center
+    def __init__(self, align_corner=False, **kwargs):
+        super(AlignedAnchor3DRangeGenerator, self).__init__(**kwargs)
+        self.align_corner = align_corner

    def anchors_single_range(self,
                             feature_size,
                             anchor_range,
-                             sizes=((1.6, 3.9, 1.56), ),
-                             rotations=(0, 3.1415927 / 2),
+                             stride,
+                             sizes=[[1.6, 3.9, 1.56]],
+                             rotations=[0, 1.5707963],
                             device='cuda'):
        """Generate anchors in a single range
        Args:
@@ -155,11 +246,11 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
            anchor_range[3],
            feature_size[2] + 1,
            device=device)
-        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * self.stride
+        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * stride
        rotations = torch.tensor(rotations, device=device)

        # shift the anchor center
-        if self.shift_center:
+        if not self.align_corner:
            z_shift = (z_centers[1] - z_centers[0]) / 2
            y_shift = (y_centers[1] - y_centers[0]) / 2
            x_shift = (x_centers[1] - x_centers[0]) / 2
@@ -187,7 +278,6 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):

        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])
        # [1, 200, 176, N, 2, 7] for kitti after permute
-        # ret = ret.reshape(-1, 7)

        if len(self.custom_values) > 0:
            custom_ndim = len(self.custom_values)

--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
 from . import box_torch_ops
 from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
-from .coders import Residual3DBoxCoder
+from .coders import DeltaXYZWLHRBBoxCoder
 # from .bbox_target import bbox_target
 from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
                              bbox_overlaps_3d, bbox_overlaps_nearest_3d)
@@ -18,7 +18,7 @@ __all__ = [
    'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
    'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
    'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
-    'build_bbox_coder', 'Residual3DBoxCoder', 'boxes3d_to_bev_torch_lidar',
+    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
    'bbox_overlaps_3d'
 ]
--- a/mmdet3d/core/bbox/assign_sampling.py
+++ b/mmdet3d/core/bbox/assign_sampling.py
@@ -14,7 +14,7 @@ def build_assigner(cfg, **kwargs):


 def build_bbox_coder(cfg, **kwargs):
-    if isinstance(cfg, coders.Residual3DBoxCoder):
+    if isinstance(cfg, coders.DeltaXYZWLHRBBoxCoder):
        return cfg
    elif isinstance(cfg, dict):
        return mmcv.runner.obj_from_dict(cfg, coders, default_args=kwargs)

--- a/mmdet3d/core/bbox/coders/__init__.py
+++ b/mmdet3d/core/bbox/coders/__init__.py
-from .box_coder import Residual3DBoxCoder
+from mmdet.core.bbox import build_bbox_coder
+from .delta_xywh_bbox_coder import DeltaXYZWLHRBBoxCoder

-__all__ = ['Residual3DBoxCoder']
+__all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder']
--- a/mmdet3d/core/bbox/coders/box_coder.py
+++ b/mmdet3d/core/bbox/coders/box_coder.py
-import numpy as np
 import torch

+from mmdet.core.bbox import BaseBBoxCoder
+from mmdet.core.bbox.builder import BBOX_CODERS

-class Residual3DBoxCoder(object):

-    def __init__(self, code_size=7, mean=None, std=None):
-        super().__init__()
-        self.code_size = code_size
-        self.mean = mean
-        self.std = std
-
-    @staticmethod
-    def encode_np(boxes, anchors):
-        """
-        :param boxes: (N, 7) x, y, z, w, l, h, r
-        :param anchors: (N, 7)
-        :return:
-        """
-        # need to convert boxes to z-center format
-        xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
-        xg, yg, zg, wg, lg, hg, rg = np.split(boxes, 7, axis=-1)
-        zg = zg + hg / 2
-        za = za + ha / 2
-        diagonal = np.sqrt(la**2 + wa**2)  # 4.3
-        xt = (xg - xa) / diagonal
-        yt = (yg - ya) / diagonal
-        zt = (zg - za) / ha  # 1.6
-        lt = np.log(lg / la)
-        wt = np.log(wg / wa)
-        ht = np.log(hg / ha)
-        rt = rg - ra
-        return np.concatenate([xt, yt, zt, wt, lt, ht, rt], axis=-1)
-
-    @staticmethod
-    def decode_np(box_encodings, anchors):
-        """
-        :param box_encodings: (N, 7) x, y, z, w, l, h, r
-        :param anchors: (N, 7)
-        :return:
-        """
-        # need to convert box_encodings to z-bottom format
-        xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
-        xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, 7, axis=-1)
+@BBOX_CODERS.register_module
+class DeltaXYZWLHRBBoxCoder(BaseBBoxCoder):

-        za = za + ha / 2
-        diagonal = np.sqrt(la**2 + wa**2)
-        xg = xt * diagonal + xa
-        yg = yt * diagonal + ya
-        zg = zt * ha + za
-
-        lg = np.exp(lt) * la
-        wg = np.exp(wt) * wa
-        hg = np.exp(ht) * ha
-        rg = rt + ra
-        zg = zg - hg / 2
-        return np.concatenate([xg, yg, zg, wg, lg, hg, rg], axis=-1)
+    def __init__(self, code_size=7):
+        super(DeltaXYZWLHRBBoxCoder, self).__init__()
+        self.code_size = code_size

    @staticmethod
-    def encode_torch(anchors, boxes, means, stds):
+    def encode(anchors, boxes):
        """
        :param boxes: (N, 7+n) x, y, z, w, l, h, r, velo*
        :param anchors: (N, 7+n)
@@ -85,7 +40,7 @@ class Residual3DBoxCoder(object):
        return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)

    @staticmethod
-    def decode_torch(anchors, box_encodings, means, stds):
+    def decode(anchors, box_encodings):
        """
        :param box_encodings: (N, 7 + n) x, y, z, w, l, h, r
        :param anchors: (N, 7)

--- a/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
+++ b/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
 from mmdet3d.ops.iou3d import boxes_iou3d_gpu
 from mmdet.core.bbox import bbox_overlaps
-from mmdet.core.bbox.iou_calculators.registry import IOU_CALCULATORS
+from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
 from .. import box_torch_ops



--- a/mmdet3d/datasets/__init__.py
+++ b/mmdet3d/datasets/__init__.py
-from mmdet.datasets.registry import DATASETS
+from mmdet.datasets.builder import DATASETS
 from .builder import build_dataset
 from .dataset_wrappers import RepeatFactorDataset
 from .kitti2d_dataset import Kitti2DDataset
 from .kitti_dataset import KittiDataset
 from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
-from .nuscenes2d_dataset import NuScenes2DDataset
 from .nuscenes_dataset import NuScenesDataset
+from .pipelines import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
+                        ObjectSample, PointShuffle, PointsRangeFilter,
+                        RandomFlip3D)

 __all__ = [
    'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
    'build_dataloader', 'RepeatFactorDataset', 'DATASETS', 'build_dataset',
-    'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'NuScenes2DDataset'
+    'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample',
+    'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
+    'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D'
 ]
--- a/mmdet3d/datasets/builder.py
+++ b/mmdet3d/datasets/builder.py
 import copy

+from mmcv.utils import build_from_cfg
+
 from mmdet.datasets import DATASETS, ConcatDataset, RepeatDataset
-from mmdet.utils import build_from_cfg
 from .dataset_wrappers import RepeatFactorDataset