Merge master

aec41c7f · zhangwenwei · 49f06039 · 4eca6606 · aec41c7f · aec41c7f
Commit aec41c7f authored Apr 27, 2020 by zhangwenwei
20 changed files
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -3,6 +3,6 @@ line_length = 79
 multi_line_output = 0
 known_standard_library = setuptools
 known_first_party = mmdet,mmdet3d
-known_third_party = cv2,mmcv,numba,numpy,nuscenes,pycocotools,pyquaternion,pytest,shapely,six,skimage,torch,torchvision
+known_third_party = cv2,mmcv,numba,numpy,nuscenes,pycocotools,pyquaternion,pytest,scipy,shapely,six,skimage,torch,torchvision
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
--- a/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
@@ -72,18 +72,21 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
+        anchor_generator=dict(
-            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+            type='Anchor3DRangeGenerator',
-            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+            ranges=[
-            [0, -40.0, -1.78, 70.4, 40.0, -1.78],
+                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
-        ],
+                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                [0, -40.0, -1.78, 70.4, 40.0, -1.78],
+            ],
+            strides=[2],
+            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=False),
        assigner_per_size=True,
-        anchor_strides=[2],
-        anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
-        anchor_rotations=[0, 1.57],
        diff_rad_by_sin=True,
        assign_per_class=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
@@ -43,12 +43,15 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
+        anchor_generator=dict(
-        anchor_strides=[2],
+            type='Anchor3DRangeGenerator',
-        anchor_sizes=[[1.6, 3.9, 1.56]],
+            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
-        anchor_rotations=[0, 1.57],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -116,7 +119,7 @@ train_pipeline = [
        loc_noise_std=[0.25, 0.25, 0.25],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],
@@ -125,7 +128,7 @@ train_pipeline = [
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
@@ -133,7 +136,7 @@ test_pipeline = [
        type='DefaultFormatBundle3D',
        class_names=class_names,
        with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
 ]
 data = dict(
@@ -174,13 +177,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 # learning policy
 lr_config = dict(
    policy='cyclic',
-    target_ratio=[10, 1e-4],
+    target_ratio=(10, 1e-4),
    cyclic_times=1,
    step_ratio_up=0.4,
 )
 momentum_config = dict(
    policy='cyclic',
-    target_ratio=[0.85 / 0.95, 1],
+    target_ratio=(0.85 / 0.95, 1),
    cyclic_times=1,
    step_ratio_up=0.4,
 )

--- a/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
@@ -41,18 +41,21 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
+        anchor_generator=dict(
-            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+            type='Anchor3DRangeGenerator',
-            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+            ranges=[
-            [0, -40.0, -1.78, 70.4, 40.0, -1.78],
+                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
-        ],
+                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
-        anchor_strides=[2],
+                [0, -40.0, -1.78, 70.4, 40.0, -1.78],
-        anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            ],
-        anchor_rotations=[0, 1.57],
+            strides=[2],
+            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=False),
        diff_rad_by_sin=True,
        assigner_per_size=True,
        assign_per_class=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -41,12 +41,15 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
+        anchor_generator=dict(
-        anchor_strides=[2],
+            type='Anchor3DRangeGenerator',
-        anchor_sizes=[[1.6, 3.9, 1.56]],
+            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
-        anchor_rotations=[0, 1.57],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -91,7 +94,7 @@ input_modality = dict(
    use_lidar=True,
    use_depth=False,
    use_lidar_intensity=True,
-    use_camera=False,
+    use_camera=True,
 )
 db_sampler = dict(
    root_path=data_root,
@@ -113,7 +116,7 @@ train_pipeline = [
        loc_noise_std=[1.0, 1.0, 0.5],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.78539816, 0.78539816]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],
@@ -122,7 +125,7 @@ train_pipeline = [
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
@@ -130,7 +133,7 @@ test_pipeline = [
        type='DefaultFormatBundle3D',
        class_names=class_names,
        with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
 ]
 data = dict(
@@ -170,13 +173,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
 optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 lr_config = dict(
    policy='cyclic',
-    target_ratio=[10, 1e-4],
+    target_ratio=(10, 1e-4),
    cyclic_times=1,
    step_ratio_up=0.4,
 )
 momentum_config = dict(
    policy='cyclic',
-    target_ratio=[0.85 / 0.95, 1],
+    target_ratio=(0.85 / 0.95, 1),
    cyclic_times=1,
    step_ratio_up=0.4,
 )

--- a/configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
+++ b/configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='FasterRCNN',
-    pretrained=('./pretrain_detectron/'
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -22,11 +21,15 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        anchor_scales=[8],
+        anchor_generator=dict(
-        anchor_ratios=[1 / 3, 0.5, 1.0, 2.0, 3.0],
+            type='AnchorGenerator',
-        anchor_strides=[4, 8, 16, 32, 64],
+            scales=[8],
-        target_means=[.0, .0, .0, .0],
+            ratios=[0.5, 1.0, 2.0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -43,8 +46,10 @@ model = dict(
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=80,
-            target_means=[0., 0., 0., 0.],
+            bbox_coder=dict(
-            target_stds=[0.1, 0.1, 0.2, 0.2],
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

--- a/configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
@@ -42,12 +42,15 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
+        anchor_generator=dict(
-        anchor_strides=[2],
+            type='Anchor3DRangeGenerator',
-        anchor_sizes=[[1.6, 3.9, 1.56]],
+            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
-        anchor_rotations=[0, 1.57],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -113,7 +116,7 @@ train_pipeline = [
        loc_noise_std=[0.25, 0.25, 0.25],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],
@@ -122,7 +125,7 @@ train_pipeline = [
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
@@ -130,7 +133,7 @@ test_pipeline = [
        type='DefaultFormatBundle3D',
        class_names=class_names,
        with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
 ]
 data = dict(
@@ -175,13 +178,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 # learning policy
 lr_config = dict(
    policy='cyclic',
-    target_ratio=[10, 1e-4],
+    target_ratio=(10, 1e-4),
    cyclic_times=1,
    step_ratio_up=0.4,
 )
 momentum_config = dict(
    policy='cyclic',
-    target_ratio=[0.85 / 0.95, 1],
+    target_ratio=(0.85 / 0.95, 1),
    cyclic_times=1,
    step_ratio_up=0.4,
 )

--- a/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -41,12 +41,15 @@ model = dict(
        feat_channels=512,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
+        anchor_generator=dict(
-        anchor_strides=[2],
+            type='Anchor3DRangeGenerator',
-        anchor_sizes=[[1.6, 3.9, 1.56]],
+            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
-        anchor_rotations=[0, 1.57],
+            strides=[2],
+            sizes=[[1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=True),
        diff_rad_by_sin=True,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
@@ -111,7 +114,7 @@ train_pipeline = [
        loc_noise_std=[1.0, 1.0, 0.5],
        global_rot_range=[0.0, 0.0],
        rot_uniform_noise=[-0.78539816, 0.78539816]),
-    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
        type='GlobalRotScale',
        rot_uniform_noise=[-0.78539816, 0.78539816],
@@ -120,7 +123,7 @@ train_pipeline = [
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
 ]
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
@@ -128,7 +131,7 @@ test_pipeline = [
        type='DefaultFormatBundle3D',
        class_names=class_names,
        with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
 ]
 data = dict(
@@ -168,13 +171,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
 optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 lr_config = dict(
    policy='cyclic',
-    target_ratio=[10, 1e-4],
+    target_ratio=(10, 1e-4),
    cyclic_times=1,
    step_ratio_up=0.4,
 )
 momentum_config = dict(
    policy='cyclic',
-    target_ratio=[0.85 / 0.95, 1],
+    target_ratio=(0.85 / 0.95, 1),
    cyclic_times=1,
    step_ratio_up=0.4,
 )

--- a/configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
+++ b/configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='FasterRCNN',
-    pretrained=('./pretrain_detectron/'
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -22,11 +21,15 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        anchor_scales=[8],
+        anchor_generator=dict(
-        anchor_ratios=[0.5, 1.0, 2.0],
+            type='AnchorGenerator',
-        anchor_strides=[4, 8, 16, 32, 64],
+            scales=[8],
-        target_means=[.0, .0, .0, .0],
+            ratios=[0.5, 1.0, 2.0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -42,9 +45,11 @@ model = dict(
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
-            num_classes=80,
+            num_classes=10,
-            target_means=[0., 0., 0., 0.],
+            bbox_coder=dict(
-            target_stds=[0.1, 0.1, 0.2, 0.2],
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
@@ -105,12 +110,14 @@ test_cfg = dict(
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
 )
 # dataset settings
-dataset_type = 'NuScenes2DDataset'
+dataset_type = 'CocoDataset'
 data_root = 'data/nuscenes/'
 # Values to be used for image normalization (BGR order)
 # Default mean pixel values are from ImageNet: [103.53, 116.28, 123.675]
 # When using pre-trained models in Detectron1 or any MSRA models,
 # std has been absorbed into its conv1 weights, so the std needs to be set 1.
+classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+           'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
 img_norm_cfg = dict(
    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 train_pipeline = [
@@ -147,14 +154,17 @@ data = dict(
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_train.coco.json',
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_val.coco.json',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
+        classes=classes,
        ann_file=data_root + 'nuscenes_infos_val.coco.json',
        pipeline=test_pipeline))
 # optimizer

--- a/configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py
+++ b/configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py
@@ -51,33 +51,35 @@ model = dict(
        feat_channels=384,
        use_direction_classifier=True,
        encode_bg_as_zeros=True,
-        anchor_range=[
+        anchor_generator=dict(
-            [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],  # car
+            type='Anchor3DRangeGenerator',
-            [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],  # truck
+            ranges=[
-            [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],  # trailer
+                [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
-            [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],  # bicycle
+                [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
-            [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],  # pedestrian
+                [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
-            [-49.6, -49.6, -1.80984986, 49.6, 49.6,
+                [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
-             -1.80984986],  # traffic_cone
+                [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
-            [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],  # barrier
+                [-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
-        ],
+                [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
-        anchor_strides=[2],
+            ],
-        anchor_sizes=[
+            strides=[2],
-            [1.95017717, 4.60718145, 1.72270761],  # car
+            sizes=[
-            [2.4560939, 6.73778078, 2.73004906],  # truck
+                [1.95017717, 4.60718145, 1.72270761],  # car
-            [2.87427237, 12.01320693, 3.81509561],  # trailer
+                [2.4560939, 6.73778078, 2.73004906],  # truck
-            [0.60058911, 1.68452161, 1.27192197],  # bicycle
+                [2.87427237, 12.01320693, 3.81509561],  # trailer
-            [0.66344886, 0.7256437, 1.75748069],  # pedestrian
+                [0.60058911, 1.68452161, 1.27192197],  # bicycle
-            [0.39694519, 0.40359262, 1.06232151],  # traffic_cone
+                [0.66344886, 0.7256437, 1.75748069],  # pedestrian
-            [2.49008838, 0.48578221, 0.98297065],  # barrier
+                [0.39694519, 0.40359262, 1.06232151],  # traffic_cone
-        ],
+                [2.49008838, 0.48578221, 0.98297065],  # barrier
-        anchor_custom_values=[0, 0],
+            ],
-        anchor_rotations=[0, 1.57],
+            custom_values=[0, 0],
+            rotations=[0, 1.57],
+            reshape_out=True),
        assigner_per_size=False,
        diff_rad_by_sin=True,
        dir_offset=0.7854,  # pi/4
        dir_limit_offset=0,
-        bbox_coder=dict(type='Residual3DBoxCoder', ),
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
+++ b/configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
@@ -2,8 +2,7 @@
 norm_cfg = dict(type='BN', requires_grad=False)
 model = dict(
    type='RetinaNet',
-    pretrained=('./pretrain_detectron/'
+    pretrained=('open-mmlab://resnet50_caffe_bgr'),
-                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -26,12 +25,16 @@ model = dict(
        in_channels=256,
        stacked_convs=4,
        feat_channels=256,
-        octave_base_scale=4,
+        anchor_generator=dict(
-        scales_per_octave=3,
+            type='AnchorGenerator',
-        anchor_ratios=[0.5, 1.0, 2.0],
+            octave_base_scale=4,
-        anchor_strides=[8, 16, 32, 64, 128],
+            scales_per_octave=3,
-        target_means=[.0, .0, .0, .0],
+            ratios=[0.5, 1.0, 2.0],
-        target_stds=[1.0, 1.0, 1.0, 1.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,

--- a/mmdet3d/apis/__init__.py
+++ b/mmdet3d/apis/__init__.py
-from .train import train_detector
+from .train import batch_processor, train_detector
-__all__ = [
+__all__ = ['batch_processor', 'train_detector']
-    'train_detector',
-]
--- a/mmdet3d/apis/train.py
+++ b/mmdet3d/apis/train.py
+import torch
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import DistSamplerSeedHook, Runner
 from mmdet.apis.train import parse_losses
+from mmdet.core import (DistEvalHook, DistOptimizerHook, EvalHook,
+                        Fp16OptimizerHook, build_optimizer)
+from mmdet.datasets import build_dataloader, build_dataset
+from mmdet.utils import get_root_logger
 def batch_processor(model, data, train_mode):
@@ -27,3 +35,87 @@ def batch_processor(model, data, train_mode):
    outputs = dict(loss=loss, log_vars=log_vars, num_samples=num_samples)
    return outputs
+def train_detector(model,
+                   dataset,
+                   cfg,
+                   distributed=False,
+                   validate=False,
+                   timestamp=None,
+                   meta=None):
+    logger = get_root_logger(cfg.log_level)
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    data_loaders = [
+        build_dataloader(
+            ds,
+            cfg.data.samples_per_gpu,
+            cfg.data.workers_per_gpu,
+            # cfg.gpus will be ignored if distributed
+            len(cfg.gpu_ids),
+            dist=distributed,
+            seed=cfg.seed) for ds in dataset
+    ]
+    # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get('find_unused_parameters', False)
+        # Sets the `find_unused_parameters` parameter in
+        # torch.nn.parallel.DistributedDataParallel
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False,
+            find_unused_parameters=find_unused_parameters)
+    else:
+        model = MMDataParallel(
+            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
+    # build runner
+    optimizer = build_optimizer(model, cfg.optimizer)
+    runner = Runner(
+        model,
+        batch_processor,
+        optimizer,
+        cfg.work_dir,
+        logger=logger,
+        meta=meta)
+    # an ugly walkaround to make the .log and .log.json filenames the same
+    runner.timestamp = timestamp
+    # fp16 setting
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(
+            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
+    elif distributed:
+        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
+    else:
+        optimizer_config = cfg.optimizer_config
+    # register hooks
+    runner.register_training_hooks(cfg.lr_config, optimizer_config,
+                                   cfg.checkpoint_config, cfg.log_config)
+    if distributed:
+        runner.register_hook(DistSamplerSeedHook())
+    # register eval hooks
+    if validate:
+        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
+        val_dataloader = build_dataloader(
+            val_dataset,
+            samples_per_gpu=1,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False)
+        eval_cfg = cfg.get('evaluation', {})
+        eval_hook = DistEvalHook if distributed else EvalHook
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
--- a/mmdet3d/core/anchor/__init__.py
+++ b/mmdet3d/core/anchor/__init__.py
-from .anchor_3d_generator import (AlignedAnchorGeneratorRange,
+from mmdet.core.anchor import build_anchor_generator
-                                  AnchorGeneratorRange)
+from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator,
+                                  Anchor3DRangeGenerator)
 __all__ = [
-    'AlignedAnchorGeneratorRange', 'AnchorGeneratorRange',
+    'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator',
    'build_anchor_generator'
 ]
-def build_anchor_generator(cfg, **kwargs):
-    from . import anchor_3d_generator
-    import mmcv
-    if isinstance(cfg, dict):
-        return mmcv.runner.obj_from_dict(
-            cfg, anchor_3d_generator, default_args=kwargs)
-    else:
-        raise TypeError('Invalid type {} for building a sampler'.format(
-            type(cfg)))
--- a/mmdet3d/core/anchor/anchor_3d_generator.py
+++ b/mmdet3d/core/anchor/anchor_3d_generator.py
+import mmcv
 import torch
+from mmdet.core.anchor import ANCHOR_GENERATORS
-class AnchorGeneratorRange(object):
+@ANCHOR_GENERATORS.register_module
+class Anchor3DRangeGenerator(object):
+    """3D Anchor Generator by range
+    This anchor generator generates anchors by the given range in different
+    feature levels.
+    Due the convention in 3D detection, different anchor sizes are related to
+    different ranges for different categories. However we find this setting
+    does not effect the performance much in some datasets, e.g., nuScenes.
+    Args:
+        ranges (list[list[float]]): Ranges of different anchors.
+            The ranges are the same across different feature levels. But may
+            vary for different anchor sizes if size_per_range is True.
+        sizes (list[list[float]]): 3D sizes of anchors.
+        strides (list[int]): Strides of anchors in different feature levels.
+        rotations (list(float)): Rotations of anchors in a feature grid.
+        custom_values (tuple(float)): Customized values of that anchor. For
+            example, in nuScenes the anchors have velocities.
+        reshape_out (bool): Whether to reshape the output into (Nx4)
+        size_per_range: Whether to use separate ranges for different sizes.
+            If size_per_range is True, the ranges should have the same length
+            as the sizes, if not, it will be duplicated.
+    """
    def __init__(self,
-                 anchor_ranges,
+                 ranges,
-                 sizes=((1.6, 3.9, 1.56), ),
+                 sizes=[[1.6, 3.9, 1.56]],
-                 stride=2,
+                 strides=[2],
-                 rotations=(0, 3.1415926 / 2),
+                 rotations=[0, 1.5707963],
                 custom_values=(),
-                 cache_anchor=False):
+                 reshape_out=True,
+                 size_per_range=True):
+        assert mmcv.is_list_of(ranges, list)
+        if size_per_range:
+            if len(sizes) != len(ranges):
+                assert len(ranges) == 1
+                ranges = ranges * len(sizes)
+            assert len(ranges) == len(sizes)
+        else:
+            assert len(ranges) == 1
+        assert mmcv.is_list_of(sizes, list)
+        assert isinstance(strides, list)
        self.sizes = sizes
-        self.stride = stride
+        self.strides = strides
-        self.anchor_ranges = anchor_ranges
+        self.ranges = ranges
-        if len(anchor_ranges) != len(sizes):
-            self.anchor_ranges = anchor_ranges * len(sizes)
        self.rotations = rotations
        self.custom_values = custom_values
-        self.cache_anchor = cache_anchor
        self.cached_anchors = None
+        self.reshape_out = reshape_out
+        self.size_per_range = size_per_range
    def __repr__(self):
        s = self.__class__.__name__ + '('
-        s += 'anchor_range={}, '.format(self.anchor_ranges)
+        s += f'anchor_range={self.ranges},\n'
-        s += 'stride={}, '.format(self.stride)
+        s += f'strides={self.strides},\n'
-        s += 'sizes={}, '.format(self.sizes)
+        s += f'sizes={self.sizes},\n'
-        s += 'rotations={})'.format(self.rotations)
+        s += f'rotations={self.rotations},\n'
+        s += f'reshape_out={self.reshape_out},\n'
+        s += f'size_per_range={self.size_per_range})'
        return s
    @property
@@ -34,40 +73,68 @@ class AnchorGeneratorRange(object):
        num_size = torch.tensor(self.sizes).reshape(-1, 3).size(0)
        return num_rot * num_size
-    def grid_anchors(self, feature_map_size, device='cuda'):
+    @property
+    def num_levels(self):
+        return len(self.strides)
+    def grid_anchors(self, featmap_sizes, device='cuda'):
+        """Generate grid anchors in multiple feature levels
+        Args:
+            featmap_sizes (list[tuple]): List of feature map sizes in
+                multiple feature levels.
+            device (str): Device where the anchors will be put on.
+        Return:
+            list[torch.Tensor]: Anchors in multiple feature levels.
+                The sizes of each tensor should be [N, 4], where
+                N = width * height * num_base_anchors, width and height
+                are the sizes of the corresponding feature lavel,
+                num_base_anchors is the number of anchors for that level.
+        """
+        assert self.num_levels == len(featmap_sizes)
+        multi_level_anchors = []
+        for i in range(self.num_levels):
+            anchors = self.single_level_grid_anchors(
+                featmap_sizes[i], self.strides[i], device=device)
+            if self.reshape_out:
+                anchors = anchors.reshape(-1, anchors.size(-1))
+            multi_level_anchors.append(anchors)
+        return multi_level_anchors
+    def single_level_grid_anchors(self, featmap_size, stride, device='cuda'):
        # We reimplement the anchor generator using torch in cuda
        # torch: 0.6975 s for 1000 times
        # numpy: 4.3345 s for 1000 times
-        # which is ~5 times faster than numpy implementation
+        # which is ~5 times faster than the numpy implementation
-        if (self.cache_anchor and self.cached_anchors):
+        if not self.size_per_range:
-            return self.cached_anchors
-        if not isinstance(self.anchor_ranges[0], list):
            return self.anchors_single_range(
-                feature_map_size,
+                featmap_size,
-                self.anchor_ranges,
+                self.ranges[0],
+                stride,
                self.sizes,
                self.rotations,
                device=device)
-        assert len(self.sizes) == len(self.anchor_ranges)
        mr_anchors = []
-        for anchor_range, anchor_size in zip(self.anchor_ranges, self.sizes):
+        for anchor_range, anchor_size in zip(self.ranges, self.sizes):
            mr_anchors.append(
                self.anchors_single_range(
-                    feature_map_size,
+                    featmap_size,
                    anchor_range,
+                    stride,
                    anchor_size,
                    self.rotations,
                    device=device))
        mr_anchors = torch.cat(mr_anchors, dim=-3)
-        if self.cache_anchor and not self.cached_anchors:
-            self.cached_anchors = mr_anchors
        return mr_anchors
    def anchors_single_range(self,
                             feature_size,
                             anchor_range,
-                             sizes=((1.6, 3.9, 1.56), ),
+                             stride=1,
-                             rotations=(0, 3.1415927 / 2),
+                             sizes=[[1.6, 3.9, 1.56]],
+                             rotations=[0, 1.5707963],
                             device='cuda'):
        """Generate anchors in a single range
        Args:
@@ -106,7 +173,6 @@ class AnchorGeneratorRange(object):
        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])
        # [1, 200, 176, N, 2, 7] for kitti after permute
-        # ret = ret.reshape(-1, 7)
        if len(self.custom_values) > 0:
            custom_ndim = len(self.custom_values)
@@ -117,17 +183,42 @@ class AnchorGeneratorRange(object):
        return ret
-class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
+@ANCHOR_GENERATORS.register_module
+class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):
+    """Aligned 3D Anchor Generator by range
+    This anchor generator uses a different manner to generate the positions
+    of anchors' centers from `Anchor3DRangeGenerator`.
+    Note:
+    The `align` means that the anchor's center is aligned with the voxel grid,
+    which is also the feature grid. The previous implementation of
+    `Anchor3DRangeGenerator` do not generate the anchors' center according
+    to the voxel grid. Rather, it generates the center by uniformly
+    distributing the anchors inside the minimum and maximum anchor ranges
+    according to the feature map sizes.
+    However, this makes the anchors center does not match the feature grid.
+    The AlignedAnchor3DRangeGenerator add + 1 when using the feature map sizes
+    to obtain the corners of the voxel grid. Then it shift the coordinates to
+    the center of voxel grid of use the left up corner to distribute anchors.
+    Args:
+        anchor_corner (bool): Whether to align with the corner of the voxel
+            grid. By default it is False and the anchor's center will be
+            the same as the corresponding voxel's center, which is also the
+            center of the corresponding greature grid.
+    """
-    def __init__(self, shift_center=True, **kwargs):
+    def __init__(self, align_corner=False, **kwargs):
-        super(AlignedAnchorGeneratorRange, self).__init__(**kwargs)
+        super(AlignedAnchor3DRangeGenerator, self).__init__(**kwargs)
-        self.shift_center = shift_center
+        self.align_corner = align_corner
    def anchors_single_range(self,
                             feature_size,
                             anchor_range,
-                             sizes=((1.6, 3.9, 1.56), ),
+                             stride,
-                             rotations=(0, 3.1415927 / 2),
+                             sizes=[[1.6, 3.9, 1.56]],
+                             rotations=[0, 1.5707963],
                             device='cuda'):
        """Generate anchors in a single range
        Args:
@@ -155,11 +246,11 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
            anchor_range[3],
            feature_size[2] + 1,
            device=device)
-        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * self.stride
+        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * stride
        rotations = torch.tensor(rotations, device=device)
        # shift the anchor center
-        if self.shift_center:
+        if not self.align_corner:
            z_shift = (z_centers[1] - z_centers[0]) / 2
            y_shift = (y_centers[1] - y_centers[0]) / 2
            x_shift = (x_centers[1] - x_centers[0]) / 2
@@ -187,7 +278,6 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])
        # [1, 200, 176, N, 2, 7] for kitti after permute
-        # ret = ret.reshape(-1, 7)
        if len(self.custom_values) > 0:
            custom_ndim = len(self.custom_values)

--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
 from . import box_torch_ops
 from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
-from .coders import Residual3DBoxCoder
+from .coders import DeltaXYZWLHRBBoxCoder
 # from .bbox_target import bbox_target
 from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
                              bbox_overlaps_3d, bbox_overlaps_nearest_3d)
@@ -18,7 +18,7 @@ __all__ = [
    'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
    'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
    'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
-    'build_bbox_coder', 'Residual3DBoxCoder', 'boxes3d_to_bev_torch_lidar',
+    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
    'bbox_overlaps_3d'
 ]
--- a/mmdet3d/core/bbox/assign_sampling.py
+++ b/mmdet3d/core/bbox/assign_sampling.py
@@ -14,7 +14,7 @@ def build_assigner(cfg, **kwargs):
 def build_bbox_coder(cfg, **kwargs):
-    if isinstance(cfg, coders.Residual3DBoxCoder):
+    if isinstance(cfg, coders.DeltaXYZWLHRBBoxCoder):
        return cfg
    elif isinstance(cfg, dict):
        return mmcv.runner.obj_from_dict(cfg, coders, default_args=kwargs)

--- a/mmdet3d/core/bbox/coders/__init__.py
+++ b/mmdet3d/core/bbox/coders/__init__.py
-from .box_coder import Residual3DBoxCoder
+from mmdet.core.bbox import build_bbox_coder
+from .delta_xywh_bbox_coder import DeltaXYZWLHRBBoxCoder
-__all__ = ['Residual3DBoxCoder']
+__all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder']
--- a/mmdet3d/core/bbox/coders/box_coder.py
+++ b/mmdet3d/core/bbox/coders/box_coder.py
-import numpy as np
 import torch
+from mmdet.core.bbox import BaseBBoxCoder
+from mmdet.core.bbox.builder import BBOX_CODERS
-class Residual3DBoxCoder(object):
-    def __init__(self, code_size=7, mean=None, std=None):
+@BBOX_CODERS.register_module
-        super().__init__()
+class DeltaXYZWLHRBBoxCoder(BaseBBoxCoder):
-        self.code_size = code_size
-        self.mean = mean
-        self.std = std
-    @staticmethod
-    def encode_np(boxes, anchors):
-        """
-        :param boxes: (N, 7) x, y, z, w, l, h, r
-        :param anchors: (N, 7)
-        :return:
-        """
-        # need to convert boxes to z-center format
-        xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
-        xg, yg, zg, wg, lg, hg, rg = np.split(boxes, 7, axis=-1)
-        zg = zg + hg / 2
-        za = za + ha / 2
-        diagonal = np.sqrt(la**2 + wa**2)  # 4.3
-        xt = (xg - xa) / diagonal
-        yt = (yg - ya) / diagonal
-        zt = (zg - za) / ha  # 1.6
-        lt = np.log(lg / la)
-        wt = np.log(wg / wa)
-        ht = np.log(hg / ha)
-        rt = rg - ra
-        return np.concatenate([xt, yt, zt, wt, lt, ht, rt], axis=-1)
-    @staticmethod
-    def decode_np(box_encodings, anchors):
-        """
-        :param box_encodings: (N, 7) x, y, z, w, l, h, r
-        :param anchors: (N, 7)
-        :return:
-        """
-        # need to convert box_encodings to z-bottom format
-        xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
-        xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, 7, axis=-1)
-        za = za + ha / 2
+    def __init__(self, code_size=7):
-        diagonal = np.sqrt(la**2 + wa**2)
+        super(DeltaXYZWLHRBBoxCoder, self).__init__()
-        xg = xt * diagonal + xa
+        self.code_size = code_size
-        yg = yt * diagonal + ya
-        zg = zt * ha + za
-        lg = np.exp(lt) * la
-        wg = np.exp(wt) * wa
-        hg = np.exp(ht) * ha
-        rg = rt + ra
-        zg = zg - hg / 2
-        return np.concatenate([xg, yg, zg, wg, lg, hg, rg], axis=-1)
    @staticmethod
-    def encode_torch(anchors, boxes, means, stds):
+    def encode(anchors, boxes):
        """
        :param boxes: (N, 7+n) x, y, z, w, l, h, r, velo*
        :param anchors: (N, 7+n)
@@ -85,7 +40,7 @@ class Residual3DBoxCoder(object):
        return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)
    @staticmethod
-    def decode_torch(anchors, box_encodings, means, stds):
+    def decode(anchors, box_encodings):
        """
        :param box_encodings: (N, 7 + n) x, y, z, w, l, h, r
        :param anchors: (N, 7)

--- a/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
+++ b/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
+import torch
 from mmdet3d.ops.iou3d import boxes_iou3d_gpu
 from mmdet.core.bbox import bbox_overlaps
-from mmdet.core.bbox.iou_calculators.registry import IOU_CALCULATORS
+from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
 from .. import box_torch_ops
@@ -33,18 +35,21 @@ class BboxOverlaps3D(object):
 def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
-    '''
+    """Calculate nearest 3D IoU
-    :param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]?
-    :param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]?
+    Args:
-    :param mode: mode (str): "iou" (intersection over union) or iof
+        bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]?
+        bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]?
+        mode: mode (str): "iou" (intersection over union) or iof
            (intersection over foreground).
-    :return: iou: (M, N) not support aligned mode currently
-    rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
+    Return:
-    '''
+        iou: (M, N) not support aligned mode currently
-    rbboxes1_bev = bboxes1.index_select(
+    """
-        dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long())
+    assert bboxes1.size(-1) == bboxes2.size(-1) == 7
-    rbboxes2_bev = bboxes2.index_select(
+    column_index1 = bboxes1.new_tensor([0, 1, 3, 4, 6], dtype=torch.long)
-        dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long())
+    rbboxes1_bev = bboxes1.index_select(dim=-1, index=column_index1)
+    rbboxes2_bev = bboxes2.index_select(dim=-1, index=column_index1)
    # Change the bboxes to bev
    # box conversion and iou calculation in torch version on CUDA
@@ -57,14 +62,18 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
 def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
-    '''
+    """Calculate 3D IoU using cuda implementation
-    :param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]
+    Args:
-    :param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
+        bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]
-    :param mode: mode (str): "iou" (intersection over union) or
+        bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
+        mode: mode (str): "iou" (intersection over union) or
            iof (intersection over foreground).
-    :return: iou: (M, N) not support aligned mode currently
-    '''
+    Return:
+        iou: (M, N) not support aligned mode currently
+    """
    # TODO: check the input dimension meanings,
    #  this is inconsistent with that in bbox_overlaps_nearest_3d
+    assert bboxes1.size(-1) == bboxes2.size(-1) == 7
    return boxes_iou3d_gpu(bboxes1, bboxes2, mode)