Merge branch 'master_temp' into indoor_loading

# Conflicts: # tools/data_converter/sunrgbd_data_utils.py

Merge branch 'master_temp' into indoor_loading
# Conflicts: # tools/data_converter/sunrgbd_data_utils.py
e0d892c7 · liyinhao · 929ebfe8 · f584b970 · e0d892c7 · e0d892c7
Commit e0d892c7 authored May 08, 2020 by liyinhao
20 changed files
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
 variables:
-  PYTORCH_IMAGE: registry.sensetime.com/eig-research/pytorch:pytorch1.3.1-cuda10.1-devel
+  PYTORCH_IMAGE: registry.sensetime.com/eig-research/pytorch:1.3.1-cuda10.1-cudnn7-devel

 stages:
  - linting
@@ -26,7 +26,7 @@ before_script:
  script:
    - echo "Start building..."
    - pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
-    - pip install git+https://github.com/open-mmlab/mmdetection.git@v2.0
+    - pip install git+https://github.com/open-mmlab/mmdetection.git
    - python -c "import mmdet; print(mmdet.__version__)"
    - pip install -v -e .[all]
    - python -c "import mmdet3d; print(mmdet3d.__version__)"

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,7 +12,7 @@ repos:
    hooks:
        - id: isort
  - repo: https://github.com/pre-commit/mirrors-yapf
-    rev: v0.29.0
+    rev: v0.30.0
    hooks:
      - id: yapf
  - repo: https://github.com/pre-commit/pre-commit-hooks

--- a/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
@@ -72,18 +72,21 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
-            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
-            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
-            [0, -40.0, -1.78, 70.4, 40.0, -1.78],
-        ],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[
+                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                [0, -40.0, -1.78, 70.4, 40.0, -1.78],
+            ],
+            strides=[2],
+            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=False),
        assigner_per_size=True,
-        anchor_strides=[2],
-        anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
        diff_rad_by_sin=True,
        assign_per_class=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
@@ -43,12 +43,15 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
-        anchor_strides=[2],
-        anchor_sizes=[[1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -116,7 +119,7 @@ train_pipeline = [
        loc_noise_std=[0.25, 0.25, 0.25],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],
@@ -125,7 +128,7 @@ train_pipeline = [
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
@@ -133,7 +136,7 @@ test_pipeline = [
        type='DefaultFormatBundle3D',
        class_names=class_names,
        with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
 ]

 data = dict(
@@ -174,13 +177,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 # learning policy
 lr_config = dict(
    policy='cyclic',
-    target_ratio=[10, 1e-4],
+    target_ratio=(10, 1e-4),
    cyclic_times=1,
    step_ratio_up=0.4,
 )
 momentum_config = dict(
    policy='cyclic',
-    target_ratio=[0.85 / 0.95, 1],
+    target_ratio=(0.85 / 0.95, 1),
    cyclic_times=1,
    step_ratio_up=0.4,
 )

--- a/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
@@ -41,18 +41,21 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
-            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
-            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
-            [0, -40.0, -1.78, 70.4, 40.0, -1.78],
-        ],
-        anchor_strides=[2],
-        anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[
+                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                [0, -40.0, -1.78, 70.4, 40.0, -1.78],
+            ],
+            strides=[2],
+            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=False),
        diff_rad_by_sin=True,
        assigner_per_size=True,
        assign_per_class=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -113,7 +116,7 @@ input_modality = dict(
    use_lidar=True,
    use_depth=False,
    use_lidar_intensity=True,
-    use_camera=False,
+    use_camera=True,
 )
 db_sampler = dict(
    root_path=data_root,

--- a/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -41,12 +41,15 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
-        anchor_strides=[2],
-        anchor_sizes=[[1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -91,7 +94,7 @@ input_modality = dict(
    use_lidar=True,
    use_depth=False,
    use_lidar_intensity=True,
-    use_camera=False,
+    use_camera=True,
 )
 db_sampler = dict(
    root_path=data_root,
@@ -113,7 +116,7 @@ train_pipeline = [
        loc_noise_std=[1.0, 1.0, 0.5],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.78539816, 0.78539816]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],
@@ -122,7 +125,7 @@ train_pipeline = [
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
@@ -130,7 +133,7 @@ test_pipeline = [
        type='DefaultFormatBundle3D',
        class_names=class_names,
        with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
 ]

 data = dict(
@@ -170,13 +173,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
 optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 lr_config = dict(
    policy='cyclic',
-    target_ratio=[10, 1e-4],
+    target_ratio=(10, 1e-4),
    cyclic_times=1,
    step_ratio_up=0.4,
 )
 momentum_config = dict(
    policy='cyclic',
-    target_ratio=[0.85 / 0.95, 1],
+    target_ratio=(0.85 / 0.95, 1),
    cyclic_times=1,
    step_ratio_up=0.4,
 )

--- a/configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
+++ b/configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='FasterRCNN',
-    pretrained=('./pretrain_detectron/'
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -22,11 +21,15 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        anchor_scales=[8],
-        anchor_ratios=[1 / 3, 0.5, 1.0, 2.0, 3.0],
-        anchor_strides=[4, 8, 16, 32, 64],
-        target_means=[.0, .0, .0, .0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -43,8 +46,10 @@ model = dict(
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=80,
-            target_means=[0., 0., 0., 0.],
-            target_stds=[0.1, 0.1, 0.2, 0.2],
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

--- a/configs/kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py
+# model settings
+voxel_size = [0.05, 0.05, 0.1]
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z
+
+model = dict(
+    type='PartA2',
+    voxel_layer=dict(
+        max_num_points=5,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(16000, 40000)  # (training, testing) max_coxels
+    ),
+    voxel_encoder=dict(type='VoxelFeatureExtractorV3'),
+    middle_encoder=dict(
+        type='SparseUNet',
+        in_channels=4,
+        output_shape=[41, 1600, 1408],
+        pre_act=False,
+    ),
+    backbone=dict(
+        type='SECOND',
+        in_channels=256,
+        layer_nums=[5, 5],
+        layer_strides=[1, 2],
+        num_filters=[128, 256]),
+    neck=dict(
+        type='SECONDFPN',
+        in_channels=[128, 256],
+        upsample_strides=[1, 2],
+        num_upsample_filters=[256, 256]),
+    rpn_head=dict(
+        type='PartA2RPNHead',
+        class_name=['Pedestrian', 'Cyclist', 'Car'],
+        in_channels=512,
+        feat_channels=512,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                    [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                    [0, -40.0, -1.78, 70.4, 40.0, -1.78]],
+            strides=[2],
+            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=False),
+        diff_rad_by_sin=True,
+        assigner_per_size=True,
+        assign_per_class=True,
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ))
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=[
+            dict(  # for Pedestrian
+                type='MaxIoUAssigner',
+                iou_calculator=dict(type='BboxOverlapsNearest3D'),
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.35,
+                min_pos_iou=0.35,
+                ignore_iof_thr=-1),
+            dict(  # for Cyclist
+                type='MaxIoUAssigner',
+                iou_calculator=dict(type='BboxOverlapsNearest3D'),
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.35,
+                min_pos_iou=0.35,
+                ignore_iof_thr=-1),
+            dict(  # for Car
+                type='MaxIoUAssigner',
+                iou_calculator=dict(type='BboxOverlapsNearest3D'),
+                pos_iou_thr=0.6,
+                neg_iou_thr=0.45,
+                min_pos_iou=0.45,
+                ignore_iof_thr=-1),
+        ],
+        allowed_border=0,
+        pos_weight=-1,
+        debug=False),
+    rpn_proposal=dict(
+        nms_pre=9000,
+        nms_post=512,
+        nms_thr=0.8,
+        score_thr=0,
+        use_rotate_nms=False),
+)
+test_cfg = dict(
+    rpn=dict(
+        nms_pre=1024,
+        max_per_img=100,
+        use_rotate_nms=True,
+        nms_across_levels=False,
+        nms_thr=0.7,
+        score_thr=0))
+
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+input_modality = dict(
+    use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=True)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
+    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
+)
+train_pipeline = [
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        loc_noise_std=[0, 0, 0],
+        global_rot_range=[0.0, 0.0],
+        rot_uniform_noise=[-0.39269908, 0.39269908]),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.78539816, 0.78539816],
+        scaling_uniform_noise=[0.95, 1.05],
+        trans_normal_noise=[0.2, 0.2, 0.2]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
+]
+
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_train.pkl',
+        split='training',
+        training=True,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='testing',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True))
+# optimizer
+lr = 0.003  # max learning rate
+optimizer = dict(
+    type='AdamW',
+    lr=lr,
+    betas=(0.95, 0.99),  # the momentum is change during training
+    weight_decay=0.001)
+optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
+lr_config = dict(
+    policy='cosine',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 10,
+    target_lr=1e-5,
+    as_ratio=True)
+momentum_config = None
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 80
+dist_params = dict(backend='nccl', port=29502)
+log_level = 'INFO'
+work_dir = './work_dirs/parta2_secfpn_80e'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
@@ -42,12 +42,15 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
-        anchor_strides=[2],
-        anchor_sizes=[[1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -113,7 +116,7 @@ train_pipeline = [
        loc_noise_std=[0.25, 0.25, 0.25],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],
@@ -122,7 +125,7 @@ train_pipeline = [
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
@@ -130,7 +133,7 @@ test_pipeline = [
        type='DefaultFormatBundle3D',
        class_names=class_names,
        with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
 ]

 data = dict(
@@ -175,13 +178,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 # learning policy
 lr_config = dict(
    policy='cyclic',
-    target_ratio=[10, 1e-4],
+    target_ratio=(10, 1e-4),
    cyclic_times=1,
    step_ratio_up=0.4,
 )
 momentum_config = dict(
    policy='cyclic',
-    target_ratio=[0.85 / 0.95, 1],
+    target_ratio=(0.85 / 0.95, 1),
    cyclic_times=1,
    step_ratio_up=0.4,
 )

--- a/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -41,12 +41,15 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
-        anchor_strides=[2],
-        anchor_sizes=[[1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -111,7 +114,7 @@ train_pipeline = [
        loc_noise_std=[1.0, 1.0, 0.5],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.78539816, 0.78539816]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],
@@ -120,7 +123,7 @@ train_pipeline = [
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
@@ -128,7 +131,7 @@ test_pipeline = [
        type='DefaultFormatBundle3D',
        class_names=class_names,
        with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
 ]

 data = dict(
@@ -168,13 +171,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
 optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 lr_config = dict(
    policy='cyclic',
-    target_ratio=[10, 1e-4],
+    target_ratio=(10, 1e-4),
    cyclic_times=1,
    step_ratio_up=0.4,
 )
 momentum_config = dict(
    policy='cyclic',
-    target_ratio=[0.85 / 0.95, 1],
+    target_ratio=(0.85 / 0.95, 1),
    cyclic_times=1,
    step_ratio_up=0.4,
 )

--- a/configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
+++ b/configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='FasterRCNN',
-    pretrained=('./pretrain_detectron/'
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -22,11 +21,15 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        anchor_scales=[8],
-        anchor_ratios=[0.5, 1.0, 2.0],
-        anchor_strides=[4, 8, 16, 32, 64],
-        target_means=[.0, .0, .0, .0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -42,9 +45,11 @@ model = dict(
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
-            num_classes=80,
-            target_means=[0., 0., 0., 0.],
-            target_stds=[0.1, 0.1, 0.2, 0.2],
+            num_classes=10,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
@@ -105,12 +110,14 @@ test_cfg = dict(
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
 )
 # dataset settings
-dataset_type = 'NuScenes2DDataset'
+dataset_type = 'CocoDataset'
 data_root = 'data/nuscenes/'
 # Values to be used for image normalization (BGR order)
 # Default mean pixel values are from ImageNet: [103.53, 116.28, 123.675]
 # When using pre-trained models in Detectron1 or any MSRA models,
 # std has been absorbed into its conv1 weights, so the std needs to be set 1.
+classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+           'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
 img_norm_cfg = dict(
    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 train_pipeline = [
@@ -147,14 +154,17 @@ data = dict(
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_train.coco.json',
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_val.coco.json',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_val.coco.json',
        pipeline=test_pipeline))
 # optimizer

--- a/configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py
+++ b/configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py
@@ -51,33 +51,35 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
-            [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],  # car
-            [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],  # truck
-            [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],  # trailer
-            [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],  # bicycle
-            [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],  # pedestrian
-            [-49.6, -49.6, -1.80984986, 49.6, 49.6,
-             -1.80984986],  # traffic_cone
-            [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],  # barrier
-        ],
-        anchor_strides=[2],
-        anchor_sizes=[
-            [1.95017717, 4.60718145, 1.72270761],  # car
-            [2.4560939, 6.73778078, 2.73004906],  # truck
-            [2.87427237, 12.01320693, 3.81509561],  # trailer
-            [0.60058911, 1.68452161, 1.27192197],  # bicycle
-            [0.66344886, 0.7256437, 1.75748069],  # pedestrian
-            [0.39694519, 0.40359262, 1.06232151],  # traffic_cone
-            [2.49008838, 0.48578221, 0.98297065],  # barrier
-        ],
-        anchor_custom_values=[0, 0],
-        anchor_rotations=[0, 1.57],
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[
+                [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
+                [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
+                [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
+                [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
+                [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
+                [-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
+                [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
+            ],
+            strides=[2],
+            sizes=[
+                [1.95017717, 4.60718145, 1.72270761],  # car
+                [2.4560939, 6.73778078, 2.73004906],  # truck
+                [2.87427237, 12.01320693, 3.81509561],  # trailer
+                [0.60058911, 1.68452161, 1.27192197],  # bicycle
+                [0.66344886, 0.7256437, 1.75748069],  # pedestrian
+                [0.39694519, 0.40359262, 1.06232151],  # traffic_cone
+                [2.49008838, 0.48578221, 0.98297065],  # barrier
+            ],
+            custom_values=[0, 0],
+            rotations=[0, 1.57],
+            reshape_out=True),
        assigner_per_size=False,
        diff_rad_by_sin=True,
        dir_offset=0.7854,  # pi/4
        dir_limit_offset=0,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -128,7 +130,7 @@ input_modality = dict(
    use_lidar=True,
    use_depth=False,
    use_lidar_intensity=True,
-    use_camera=True,
+    use_camera=False,
 )
 db_sampler = dict(
    root_path=data_root,
@@ -154,23 +156,12 @@ train_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-    dict(
-        type='Resize',
-        img_scale=[
-            (1280, 720),
-        ],
-        multiscale_mode='value',
-        keep_ratio=True),
    dict(type='RandomFlip3D', flip_ratio=0),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
    dict(
        type='DefaultFormatBundle3D',
        class_names=class_names,
@@ -214,7 +205,7 @@ lr_config = dict(
    warmup='linear',
    warmup_iters=1000,
    warmup_ratio=1.0 / 1000,
-    step=[16, 19])
+    step=[20, 23])
 momentum_config = None
 checkpoint_config = dict(interval=1)
 # yapf:disable
@@ -227,10 +218,10 @@ log_config = dict(
    ])
 # yapf:enable
 # runtime settings
-total_epochs = 20
+total_epochs = 24
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
-work_dir = './work_dirs/pp_secfpn_80e'
+work_dir = './work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
 load_from = None
 resume_from = None
 workflow = [('train', 1)]
--- a/configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
+++ b/configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='RetinaNet',
-    pretrained=('./pretrain_detectron/'
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -26,12 +25,16 @@ model = dict(
        in_channels=256,
        stacked_convs=4,
        feat_channels=256,
-        octave_base_scale=4,
-        scales_per_octave=3,
-        anchor_ratios=[0.5, 1.0, 2.0],
-        anchor_strides=[8, 16, 32, 64, 128],
-        target_means=[.0, .0, .0, .0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/docs/GETTING_STARTED.md
+++ b/docs/GETTING_STARTED.md
@@ -374,7 +374,7 @@ from .coco import CocoDataset
 from .registry import DATASETS


-@DATASETS.register_module
+@DATASETS.register_module()
 class MyDataset(CocoDataset):

    CLASSES = ('a', 'b', 'c', 'd', 'e')
@@ -444,7 +444,7 @@ from .registry import OPTIMIZERS
 from torch.optim import Optimizer


-@OPTIMIZERS.register_module
+@OPTIMIZERS.register_module()
 class MyOptimizer(Optimizer):

 ```
@@ -476,7 +476,7 @@ import torch.nn as nn
 from ..registry import BACKBONES


-@BACKBONES.register_module
+@BACKBONES.register_module()
 class MobileNet(nn.Module):

    def __init__(self, arg1, arg2):

--- a/mmdet3d/apis/__init__.py
+++ b/mmdet3d/apis/__init__.py
-from .train import train_detector
+from .train import batch_processor, train_detector

-__all__ = [
-    'train_detector',
-]
+__all__ = ['batch_processor', 'train_detector']
--- a/mmdet3d/apis/train.py
+++ b/mmdet3d/apis/train.py
+import torch
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import DistSamplerSeedHook, Runner
+
+from mmdet3d.utils import get_root_logger
 from mmdet.apis.train import parse_losses
+from mmdet.core import (DistEvalHook, DistOptimizerHook, EvalHook,
+                        Fp16OptimizerHook, build_optimizer)
+from mmdet.datasets import build_dataloader, build_dataset


 def batch_processor(model, data, train_mode):
@@ -27,3 +35,88 @@ def batch_processor(model, data, train_mode):
    outputs = dict(loss=loss, log_vars=log_vars, num_samples=num_samples)

    return outputs
+
+
+def train_detector(model,
+                   dataset,
+                   cfg,
+                   distributed=False,
+                   validate=False,
+                   timestamp=None,
+                   meta=None):
+    logger = get_root_logger(cfg.log_level)
+
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    data_loaders = [
+        build_dataloader(
+            ds,
+            cfg.data.samples_per_gpu,
+            cfg.data.workers_per_gpu,
+            # cfg.gpus will be ignored if distributed
+            len(cfg.gpu_ids),
+            dist=distributed,
+            seed=cfg.seed) for ds in dataset
+    ]
+
+    # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get('find_unused_parameters', False)
+        # Sets the `find_unused_parameters` parameter in
+        # torch.nn.parallel.DistributedDataParallel
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False,
+            find_unused_parameters=find_unused_parameters)
+    else:
+        model = MMDataParallel(
+            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
+
+    # build runner
+    optimizer = build_optimizer(model, cfg.optimizer)
+    runner = Runner(
+        model,
+        batch_processor,
+        optimizer,
+        cfg.work_dir,
+        logger=logger,
+        meta=meta)
+    # an ugly walkaround to make the .log and .log.json filenames the same
+    runner.timestamp = timestamp
+
+    # fp16 setting
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(
+            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
+    elif distributed and 'type' not in cfg.optimizer_config:
+        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
+    else:
+        optimizer_config = cfg.optimizer_config
+
+    # register hooks
+    runner.register_training_hooks(cfg.lr_config, optimizer_config,
+                                   cfg.checkpoint_config, cfg.log_config,
+                                   cfg.get('momentum_config', None))
+    if distributed:
+        runner.register_hook(DistSamplerSeedHook())
+
+    # register eval hooks
+    if validate:
+        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
+        val_dataloader = build_dataloader(
+            val_dataset,
+            samples_per_gpu=1,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False)
+        eval_cfg = cfg.get('evaluation', {})
+        eval_hook = DistEvalHook if distributed else EvalHook
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
--- a/mmdet3d/core/anchor/__init__.py
+++ b/mmdet3d/core/anchor/__init__.py
-from .anchor_3d_generator import (AlignedAnchorGeneratorRange,
-                                  AnchorGeneratorRange)
+from mmdet.core.anchor import build_anchor_generator
+from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator,
+                                  Anchor3DRangeGenerator)

 __all__ = [
-    'AlignedAnchorGeneratorRange', 'AnchorGeneratorRange',
+    'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator',
    'build_anchor_generator'
 ]
-
-
-def build_anchor_generator(cfg, **kwargs):
-    from . import anchor_3d_generator
-    import mmcv
-    if isinstance(cfg, dict):
-        return mmcv.runner.obj_from_dict(
-            cfg, anchor_3d_generator, default_args=kwargs)
-    else:
-        raise TypeError('Invalid type {} for building a sampler'.format(
-            type(cfg)))
--- a/mmdet3d/core/anchor/anchor_3d_generator.py
+++ b/mmdet3d/core/anchor/anchor_3d_generator.py
+import mmcv
 import torch

+from mmdet.core.anchor import ANCHOR_GENERATORS

-class AnchorGeneratorRange(object):
+
+@ANCHOR_GENERATORS.register_module()
+class Anchor3DRangeGenerator(object):
+    """3D Anchor Generator by range
+
+    This anchor generator generates anchors by the given range in different
+    feature levels.
+    Due the convention in 3D detection, different anchor sizes are related to
+    different ranges for different categories. However we find this setting
+    does not effect the performance much in some datasets, e.g., nuScenes.
+
+    Args:
+        ranges (list[list[float]]): Ranges of different anchors.
+            The ranges are the same across different feature levels. But may
+            vary for different anchor sizes if size_per_range is True.
+        sizes (list[list[float]]): 3D sizes of anchors.
+        strides (list[int]): Strides of anchors in different feature levels.
+        rotations (list(float)): Rotations of anchors in a feature grid.
+        custom_values (tuple(float)): Customized values of that anchor. For
+            example, in nuScenes the anchors have velocities.
+        reshape_out (bool): Whether to reshape the output into (Nx4)
+        size_per_range: Whether to use separate ranges for different sizes.
+            If size_per_range is True, the ranges should have the same length
+            as the sizes, if not, it will be duplicated.
+    """

    def __init__(self,
-                 anchor_ranges,
-                 sizes=((1.6, 3.9, 1.56), ),
-                 stride=2,
-                 rotations=(0, 3.1415926 / 2),
+                 ranges,
+                 sizes=[[1.6, 3.9, 1.56]],
+                 strides=[2],
+                 rotations=[0, 1.5707963],
                 custom_values=(),
-                 cache_anchor=False):
+                 reshape_out=True,
+                 size_per_range=True):
+        assert mmcv.is_list_of(ranges, list)
+        if size_per_range:
+            if len(sizes) != len(ranges):
+                assert len(ranges) == 1
+                ranges = ranges * len(sizes)
+            assert len(ranges) == len(sizes)
+        else:
+            assert len(ranges) == 1
+        assert mmcv.is_list_of(sizes, list)
+        assert isinstance(strides, list)
+
        self.sizes = sizes
-        self.stride = stride
-        self.anchor_ranges = anchor_ranges
-        if len(anchor_ranges) != len(sizes):
-            self.anchor_ranges = anchor_ranges * len(sizes)
+        self.strides = strides
+        self.ranges = ranges
        self.rotations = rotations
        self.custom_values = custom_values
-        self.cache_anchor = cache_anchor
        self.cached_anchors = None
+        self.reshape_out = reshape_out
+        self.size_per_range = size_per_range

    def __repr__(self):
        s = self.__class__.__name__ + '('
-        s += 'anchor_range={}, '.format(self.anchor_ranges)
-        s += 'stride={}, '.format(self.stride)
-        s += 'sizes={}, '.format(self.sizes)
-        s += 'rotations={})'.format(self.rotations)
+        s += f'anchor_range={self.ranges},\n'
+        s += f'strides={self.strides},\n'
+        s += f'sizes={self.sizes},\n'
+        s += f'rotations={self.rotations},\n'
+        s += f'reshape_out={self.reshape_out},\n'
+        s += f'size_per_range={self.size_per_range})'
        return s

    @property
@@ -34,40 +73,68 @@ class AnchorGeneratorRange(object):
        num_size = torch.tensor(self.sizes).reshape(-1, 3).size(0)
        return num_rot * num_size

-    def grid_anchors(self, feature_map_size, device='cuda'):
+    @property
+    def num_levels(self):
+        return len(self.strides)
+
+    def grid_anchors(self, featmap_sizes, device='cuda'):
+        """Generate grid anchors in multiple feature levels
+
+        Args:
+            featmap_sizes (list[tuple]): List of feature map sizes in
+                multiple feature levels.
+            device (str): Device where the anchors will be put on.
+
+        Return:
+            list[torch.Tensor]: Anchors in multiple feature levels.
+                The sizes of each tensor should be [N, 4], where
+                N = width * height * num_base_anchors, width and height
+                are the sizes of the corresponding feature lavel,
+                num_base_anchors is the number of anchors for that level.
+        """
+        assert self.num_levels == len(featmap_sizes)
+        multi_level_anchors = []
+        for i in range(self.num_levels):
+            anchors = self.single_level_grid_anchors(
+                featmap_sizes[i], self.strides[i], device=device)
+            if self.reshape_out:
+                anchors = anchors.reshape(-1, anchors.size(-1))
+            multi_level_anchors.append(anchors)
+        return multi_level_anchors
+
+    def single_level_grid_anchors(self, featmap_size, stride, device='cuda'):
        # We reimplement the anchor generator using torch in cuda
        # torch: 0.6975 s for 1000 times
        # numpy: 4.3345 s for 1000 times
-        # which is ~5 times faster than numpy implementation
-        if (self.cache_anchor and self.cached_anchors):
-            return self.cached_anchors
-        if not isinstance(self.anchor_ranges[0], list):
+        # which is ~5 times faster than the numpy implementation
+        if not self.size_per_range:
            return self.anchors_single_range(
-                feature_map_size,
-                self.anchor_ranges,
+                featmap_size,
+                self.ranges[0],
+                stride,
                self.sizes,
                self.rotations,
                device=device)
-        assert len(self.sizes) == len(self.anchor_ranges)
+
        mr_anchors = []
-        for anchor_range, anchor_size in zip(self.anchor_ranges, self.sizes):
+        for anchor_range, anchor_size in zip(self.ranges, self.sizes):
            mr_anchors.append(
                self.anchors_single_range(
-                    feature_map_size,
+                    featmap_size,
                    anchor_range,
+                    stride,
                    anchor_size,
                    self.rotations,
                    device=device))
        mr_anchors = torch.cat(mr_anchors, dim=-3)
-        if self.cache_anchor and not self.cached_anchors:
-            self.cached_anchors = mr_anchors
        return mr_anchors

    def anchors_single_range(self,
                             feature_size,
                             anchor_range,
-                             sizes=((1.6, 3.9, 1.56), ),
-                             rotations=(0, 3.1415927 / 2),
+                             stride=1,
+                             sizes=[[1.6, 3.9, 1.56]],
+                             rotations=[0, 1.5707963],
                             device='cuda'):
        """Generate anchors in a single range
        Args:
@@ -106,7 +173,6 @@ class AnchorGeneratorRange(object):

        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])
        # [1, 200, 176, N, 2, 7] for kitti after permute
-        # ret = ret.reshape(-1, 7)

        if len(self.custom_values) > 0:
            custom_ndim = len(self.custom_values)
@@ -117,17 +183,42 @@ class AnchorGeneratorRange(object):
        return ret


-class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
+@ANCHOR_GENERATORS.register_module()
+class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):
+    """Aligned 3D Anchor Generator by range
+
+    This anchor generator uses a different manner to generate the positions
+    of anchors' centers from `Anchor3DRangeGenerator`.
+
+    Note:
+    The `align` means that the anchor's center is aligned with the voxel grid,
+    which is also the feature grid. The previous implementation of
+    `Anchor3DRangeGenerator` do not generate the anchors' center according
+    to the voxel grid. Rather, it generates the center by uniformly
+    distributing the anchors inside the minimum and maximum anchor ranges
+    according to the feature map sizes.
+    However, this makes the anchors center does not match the feature grid.
+    The AlignedAnchor3DRangeGenerator add + 1 when using the feature map sizes
+    to obtain the corners of the voxel grid. Then it shift the coordinates to
+    the center of voxel grid of use the left up corner to distribute anchors.
+
+    Args:
+        anchor_corner (bool): Whether to align with the corner of the voxel
+            grid. By default it is False and the anchor's center will be
+            the same as the corresponding voxel's center, which is also the
+            center of the corresponding greature grid.
+    """

-    def __init__(self, shift_center=True, **kwargs):
-        super(AlignedAnchorGeneratorRange, self).__init__(**kwargs)
-        self.shift_center = shift_center
+    def __init__(self, align_corner=False, **kwargs):
+        super(AlignedAnchor3DRangeGenerator, self).__init__(**kwargs)
+        self.align_corner = align_corner

    def anchors_single_range(self,
                             feature_size,
                             anchor_range,
-                             sizes=((1.6, 3.9, 1.56), ),
-                             rotations=(0, 3.1415927 / 2),
+                             stride,
+                             sizes=[[1.6, 3.9, 1.56]],
+                             rotations=[0, 1.5707963],
                             device='cuda'):
        """Generate anchors in a single range
        Args:
@@ -155,11 +246,11 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
            anchor_range[3],
            feature_size[2] + 1,
            device=device)
-        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * self.stride
+        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * stride
        rotations = torch.tensor(rotations, device=device)

        # shift the anchor center
-        if self.shift_center:
+        if not self.align_corner:
            z_shift = (z_centers[1] - z_centers[0]) / 2
            y_shift = (y_centers[1] - y_centers[0]) / 2
            x_shift = (x_centers[1] - x_centers[0]) / 2
@@ -187,7 +278,6 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):

        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])
        # [1, 200, 176, N, 2, 7] for kitti after permute
-        # ret = ret.reshape(-1, 7)

        if len(self.custom_values) > 0:
            custom_ndim = len(self.custom_values)

--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
 from . import box_torch_ops
 from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
-from .coders import Residual3DBoxCoder
+from .coders import DeltaXYZWLHRBBoxCoder
 # from .bbox_target import bbox_target
 from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
                              bbox_overlaps_3d, bbox_overlaps_nearest_3d)
@@ -18,7 +18,7 @@ __all__ = [
    'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
    'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
    'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
-    'build_bbox_coder', 'Residual3DBoxCoder', 'boxes3d_to_bev_torch_lidar',
+    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
    'bbox_overlaps_3d'
 ]
--- a/mmdet3d/core/bbox/assign_sampling.py
+++ b/mmdet3d/core/bbox/assign_sampling.py
@@ -14,7 +14,7 @@ def build_assigner(cfg, **kwargs):


 def build_bbox_coder(cfg, **kwargs):
-    if isinstance(cfg, coders.Residual3DBoxCoder):
+    if isinstance(cfg, coders.DeltaXYZWLHRBBoxCoder):
        return cfg
    elif isinstance(cfg, dict):
        return mmcv.runner.obj_from_dict(cfg, coders, default_args=kwargs)