Merge branch 'add-tta' into 'master'

Support test time augmentation See merge request open-mmlab/mmdet.3d!70

Merge branch 'add-tta' into 'master'
Support test time augmentation See merge request open-mmlab/mmdet.3d!70
ce79da2e · zhangwenwei · f6e95edd · 3c5ff9fa · ce79da2e · ce79da2e
Commit ce79da2e authored Jun 17, 2020 by zhangwenwei
20 changed files
--- a/configs/second/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/second/dv_second_secfpn_6x8_80e_kitti-3d-car.py
-# model settings
-voxel_size = [0.05, 0.05, 0.1]
-point_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z
-model = dict(
-    type='DynamicVoxelNet',
-    voxel_layer=dict(
-        max_num_points=-1,  # max_points_per_voxel
-        point_cloud_range=point_cloud_range,
-        voxel_size=voxel_size,
-        max_voxels=(-1, -1)  # (training, testing) max_coxels
-    ),
-    voxel_encoder=dict(
-        type='DynamicSimpleVFE',
-        voxel_size=voxel_size,
-        point_cloud_range=point_cloud_range),
-    middle_encoder=dict(
-        type='SparseEncoder',
-        in_channels=4,
-        sparse_shape=[41, 1600, 1408],
-        order=('conv', 'norm', 'act')),
-    backbone=dict(
-        type='SECOND',
-        in_channels=256,
-        layer_nums=[5, 5],
-        layer_strides=[1, 2],
-        out_channels=[128, 256]),
-    neck=dict(
-        type='SECONDFPN',
-        in_channels=[128, 256],
-        upsample_strides=[1, 2],
-        out_channels=[256, 256]),
-    bbox_head=dict(
-        type='Anchor3DHead',
-        num_classes=1,
-        in_channels=512,
-        feat_channels=512,
-        use_direction_classifier=True,
-        anchor_generator=dict(
-            type='Anchor3DRangeGenerator',
-            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
-            sizes=[[1.6, 3.9, 1.56]],
-            rotations=[0, 1.57],
-            reshape_out=True),
-        diff_rad_by_sin=True,
-        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
-        loss_cls=dict(
-            type='FocalLoss',
-            use_sigmoid=True,
-            gamma=2.0,
-            alpha=0.25,
-            loss_weight=1.0),
-        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
-        loss_dir=dict(
-            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
-# model training and testing settings
-train_cfg = dict(
-    assigner=dict(
-        type='MaxIoUAssigner',
-        iou_calculator=dict(type='BboxOverlapsNearest3D'),
-        pos_iou_thr=0.6,
-        neg_iou_thr=0.45,
-        min_pos_iou=0.45,
-        ignore_iof_thr=-1),
-    allowed_border=0,
-    pos_weight=-1,
-    debug=False)
-test_cfg = dict(
-    use_rotate_nms=True,
-    nms_across_levels=False,
-    nms_thr=0.01,
-    score_thr=0.1,
-    min_bbox_size=0,
-    nms_pre=100,
-    max_num=50)
 # dataset settings
 dataset_type = 'KittiDataset'
 data_root = 'data/kitti/'
-class_names = ['Car']
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]
 input_modality = dict(use_lidar=True, use_camera=False)
 db_sampler = dict(
    data_root=data_root,
@@ -86,39 +11,72 @@ db_sampler = dict(
    object_rot_range=[0.0, 0.0],
    prepare=dict(
        filter_by_difficulty=[-1],
-        filter_by_min_points=dict(Car=5),
+        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
-    ),
+    classes=class_names,
-    sample_groups=dict(Car=15),
+    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))
-    classes=class_names)
+file_client_args = dict(backend='disk')
+# Uncomment the following if use ceph or other file clients.
+# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
+# for more details.
+# file_client_args = dict(
+#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
 train_pipeline = [
-    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
+    dict(
-    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+        type='LoadPointsFromFile',
+        load_dim=4,
+        use_dim=4,
+        file_client_args=file_client_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=True,
+        with_label_3d=True,
+        file_client_args=file_client_args),
    dict(type='ObjectSample', db_sampler=db_sampler),
    dict(
        type='ObjectNoise',
        num_try=100,
-        loc_noise_std=[1.0, 1.0, 0.5],
+        translation_std=[1.0, 1.0, 0.5],
        global_rot_range=[0.0, 0.0],
-        rot_uniform_noise=[-0.78539816, 0.78539816]),
+        rot_range=[-0.78539816, 0.78539816]),
    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
-        type='GlobalRotScale',
+        type='GlobalRotScaleTrans',
-        rot_uniform_noise=[-0.78539816, 0.78539816],
+        rot_range=[-0.78539816, 0.78539816],
-        scaling_uniform_noise=[0.95, 1.05]),
+        scale_ratio_range=[0.95, 1.05]),
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 ]
 test_pipeline = [
-    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(
-        type='DefaultFormatBundle3D',
+        type='LoadPointsFromFile',
-        class_names=class_names,
+        load_dim=4,
-        with_label=False),
+        use_dim=4,
-    dict(type='Collect3D', keys=['points']),
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
 ]
 data = dict(
@@ -157,37 +115,5 @@ data = dict(
        modality=input_modality,
        classes=class_names,
        test_mode=True))
-# optimizer
-lr = 0.0018  # max learning rate
-optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
-optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
-lr_config = dict(
-    policy='cyclic',
-    target_ratio=(10, 1e-4),
-    cyclic_times=1,
-    step_ratio_up=0.4,
-)
-momentum_config = dict(
-    policy='cyclic',
-    target_ratio=(0.85 / 0.95, 1),
-    cyclic_times=1,
-    step_ratio_up=0.4,
-)
-checkpoint_config = dict(interval=1)
 evaluation = dict(interval=1)
-# yapf:disable
-log_config = dict(
-    interval=50,
-    hooks=[
-        dict(type='TextLoggerHook'),
-        dict(type='TensorboardLoggerHook')
-    ])
-# yapf:enable
-# runtime settings
-total_epochs = 40
-dist_params = dict(backend='nccl')
-log_level = 'INFO'
-work_dir = './work_dirs/sec_secfpn_80e'
-load_from = None
-resume_from = None
-workflow = [('train', 1)]
--- a/configs/_base_/datasets/kitti-3d-car.py
+++ b/configs/_base_/datasets/kitti-3d-car.py
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Car']
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]
+input_modality = dict(use_lidar=True, use_camera=False)
+db_sampler = dict(
+    data_root=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
+    classes=class_names,
+    sample_groups=dict(Car=15))
+file_client_args = dict(backend='disk')
+# Uncomment the following if use ceph or other file clients.
+# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
+# for more details.
+# file_client_args = dict(
+#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        load_dim=4,
+        use_dim=4,
+        file_client_args=file_client_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=True,
+        with_label_3d=True,
+        file_client_args=file_client_args),
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        translation_std=[1.0, 1.0, 0.5],
+        global_rot_range=[0.0, 0.0],
+        rot_range=[-0.78539816, 0.78539816]),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.78539816, 0.78539816],
+        scale_ratio_range=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        load_dim=4,
+        use_dim=4,
+        file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
+]
+data = dict(
+    samples_per_gpu=6,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=2,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file=data_root + 'kitti_infos_train.pkl',
+            split='training',
+            pts_prefix='velodyne_reduced',
+            pipeline=train_pipeline,
+            modality=input_modality,
+            classes=class_names,
+            test_mode=False)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pts_prefix='velodyne_reduced',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        classes=class_names,
+        test_mode=True),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pts_prefix='velodyne_reduced',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        classes=class_names,
+        test_mode=True))
+evaluation = dict(interval=1)
--- a/configs/_base_/datasets/nus-3d.py
+++ b/configs/_base_/datasets/nus-3d.py
+# If point cloud range is changed, the models should also change their point
+# cloud range accordingly
 point_cloud_range = [-50, -50, -5, 50, 50, 3]
+# For nuScenes we usually do 10-class detection
 class_names = [
    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
 ]
 dataset_type = 'NuScenesDataset'
 data_root = 'data/nuscenes/'
-file_client_args = dict(backend='disk')
+# Input modality for nuScenes dataset, this is consistent with the submission
+# format which requires the information in input_modality.
+input_modality = dict(
+    use_lidar=True,
+    use_camera=False,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+# file_client_args = dict(backend='disk')
+# Uncomment the following if use ceph or other file clients.
+# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
+# for more details.
+file_client_args = dict(
+    backend='petrel',
+    path_mapping=dict({
+        './data/nuscenes/': 's3://nuscenes/nuscenes/',
+        'data/nuscenes/': 's3://nuscenes/nuscenes/'
+    }))
 train_pipeline = [
    dict(
        type='LoadPointsFromFile',
@@ -18,10 +38,10 @@ train_pipeline = [
        file_client_args=file_client_args),
    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
    dict(
-        type='GlobalRotScale',
+        type='GlobalRotScaleTrans',
-        rot_uniform_noise=[-0.3925, 0.3925],
+        rot_range=[-0.3925, 0.3925],
-        scaling_uniform_noise=[0.95, 1.05],
+        scale_ratio_range=[0.95, 1.05],
-        trans_normal_noise=[0, 0, 0]),
+        translation_std=[0, 0, 0]),
    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
@@ -39,13 +59,26 @@ test_pipeline = [
        type='LoadPointsFromMultiSweeps',
        sweeps_num=10,
        file_client_args=file_client_args),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-    dict(type='RandomFlip3D', flip_ratio=0),
    dict(
-        type='DefaultFormatBundle3D',
+        type='MultiScaleFlipAug3D',
-        class_names=class_names,
+        img_scale=(1333, 800),
-        with_label=False),
+        pts_scale_ratio=1,
-    dict(type='Collect3D', keys=['points'])
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
 ]
 data = dict(
@@ -57,6 +90,7 @@ data = dict(
        ann_file=data_root + 'nuscenes_infos_train.pkl',
        pipeline=train_pipeline,
        classes=class_names,
+        modality=input_modality,
        test_mode=False),
    val=dict(
        type=dataset_type,
@@ -64,6 +98,7 @@ data = dict(
        ann_file=data_root + 'nuscenes_infos_val.pkl',
        pipeline=test_pipeline,
        classes=class_names,
+        modality=input_modality,
        test_mode=True),
    test=dict(
        type=dataset_type,
@@ -71,4 +106,10 @@ data = dict(
        ann_file=data_root + 'nuscenes_infos_val.pkl',
        pipeline=test_pipeline,
        classes=class_names,
+        modality=input_modality,
        test_mode=True))
+# For nuScenes dataset, we usually evaluate the model at the end of training.
+# Since the models are trained by 24 epochs by default, we set evaluation
+# interval to be 24. Please change the interval accordingly if you do not
+# use a default schedule.
+evaluation = dict(interval=24)
--- a/configs/_base_/datasets/scannet-3d-18class.py
+++ b/configs/_base_/datasets/scannet-3d-18class.py
@@ -24,7 +24,7 @@ train_pipeline = [
    dict(type='IndoorPointSample', num_points=40000),
    dict(type='IndoorFlipData', flip_ratio_yz=0.5, flip_ratio_xz=0.5),
    dict(
-        type='IndoorGlobalRotScale',
+        type='IndoorGlobalRotScaleTrans',
        shift_height=True,
        rot_range=[-1 / 36, 1 / 36],
        scale_range=None),
@@ -42,9 +42,25 @@ test_pipeline = [
        shift_height=True,
        load_dim=6,
        use_dim=[0, 1, 2]),
-    dict(type='IndoorPointSample', num_points=40000),
+    dict(
-    dict(type='DefaultFormatBundle3D', class_names=class_names),
+        type='MultiScaleFlipAug3D',
-    dict(type='Collect3D', keys=['points'])
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(type='IndoorPointSample', num_points=40000),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
 ]
 data = dict(

--- a/configs/_base_/datasets/sunrgbd-3d-10class.py
+++ b/configs/_base_/datasets/sunrgbd-3d-10class.py
@@ -11,7 +11,7 @@ train_pipeline = [
    dict(type='LoadAnnotations3D'),
    dict(type='IndoorFlipData', flip_ratio_yz=0.5),
    dict(
-        type='IndoorGlobalRotScale',
+        type='IndoorGlobalRotScaleTrans',
        shift_height=True,
        rot_range=[-1 / 6, 1 / 6],
        scale_range=[0.85, 1.15]),
@@ -25,9 +25,25 @@ test_pipeline = [
        shift_height=True,
        load_dim=6,
        use_dim=[0, 1, 2]),
-    dict(type='IndoorPointSample', num_points=20000),
+    dict(
-    dict(type='DefaultFormatBundle3D', class_names=class_names),
+        type='MultiScaleFlipAug3D',
-    dict(type='Collect3D', keys=['points'])
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(type='IndoorPointSample', num_points=20000),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
 ]
 data = dict(

--- a/configs/_base_/default_runtime.py
+++ b/configs/_base_/default_runtime.py
 checkpoint_config = dict(interval=1)
 # yapf:disable push
+# By default we use textlogger hook and tensorboard
+# For more loggers see
+# https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
 log_config = dict(
    interval=50,
    hooks=[

--- a/configs/benchmark/hv_pointpillars_secfpn_6x8_80e_pcdet_kitti-3d-3class.py
+++ b/configs/benchmark/hv_pointpillars_secfpn_6x8_80e_pcdet_kitti-3d-3class.py
-# model settings
-point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
 voxel_size = [0.16, 0.16, 4]
 model = dict(
    type='VoxelNet',
    voxel_layer=dict(
-        max_num_points=32,  # max_points_per_voxel
+        max_num_points=32,
-        point_cloud_range=point_cloud_range,
+        point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
        voxel_size=voxel_size,
-        max_voxels=(16000, 40000)  # (training, testing) max_coxels
+        max_voxels=(16000, 40000)),
-    ),
    voxel_encoder=dict(
        type='PillarFeatureNet',
        in_channels=4,
        feat_channels=[64],
        with_distance=False,
        voxel_size=voxel_size,
-        point_cloud_range=point_cloud_range,
+        point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
-    ),
    middle_encoder=dict(
-        type='PointPillarsScatter',
+        type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
-        in_channels=64,
-        output_shape=[496, 432],
-    ),
    backbone=dict(
        type='SECOND',
        in_channels=64,
        layer_nums=[3, 5, 5],
        layer_strides=[2, 2, 2],
-        out_channels=[64, 128, 256],
+        out_channels=[64, 128, 256]),
-    ),
    neck=dict(
        type='SECONDFPN',
        in_channels=[64, 128, 256],
        upsample_strides=[1, 2, 4],
-        out_channels=[128, 128, 128],
+        out_channels=[128, 128, 128]),
-    ),
    bbox_head=dict(
        type='Anchor3DHead',
        num_classes=3,
@@ -44,9 +35,9 @@ model = dict(
        anchor_generator=dict(
            type='Anchor3DRangeGenerator',
            ranges=[
-                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                [0, -39.68, -0.6, 70.4, 39.68, -0.6],
-                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                [0, -39.68, -0.6, 70.4, 39.68, -0.6],
-                [0, -40.0, -1.78, 70.4, 40.0, -1.78],
+                [0, -39.68, -1.78, 70.4, 39.68, -1.78],
            ],
            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
            rotations=[0, 1.57],
@@ -61,9 +52,7 @@ model = dict(
            loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
        loss_dir=dict(
-            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
-    ),
-)
 # model training and testing settings
 train_cfg = dict(
    assigner=[
@@ -100,131 +89,3 @@ test_cfg = dict(
    min_bbox_size=0,
    nms_pre=100,
    max_num=50)
-# dataset settings
-dataset_type = 'KittiDataset'
-data_root = 'data/kitti/'
-class_names = ['Pedestrian', 'Cyclist', 'Car']
-input_modality = dict(use_lidar=True, use_camera=False)
-db_sampler = dict(
-    data_root=data_root,
-    info_path=data_root + 'kitti_dbinfos_train.pkl',
-    rate=1.0,
-    object_rot_range=[0.0, 0.0],
-    prepare=dict(
-        filter_by_difficulty=[-1],
-        filter_by_min_points=dict(
-            Car=5,
-            Pedestrian=5,
-            Cyclist=5,
-        )),
-    classes=class_names,
-    sample_groups=dict(
-        Car=15,
-        Pedestrian=10,
-        Cyclist=10,
-    ))
-train_pipeline = [
-    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
-    dict(type='ObjectSample', db_sampler=db_sampler),
-    dict(
-        type='ObjectNoise',
-        num_try=100,
-        loc_noise_std=[1.0, 1.0, 0.1],
-        global_rot_range=[0.0, 0.0],
-        rot_uniform_noise=[-0.78539816, 0.78539816]),
-    dict(type='RandomFlip3D', flip_ratio=0.5),
-    dict(
-        type='GlobalRotScale',
-        rot_uniform_noise=[-0.78539816, 0.78539816],
-        scaling_uniform_noise=[0.95, 1.05]),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
-    dict(type='PointShuffle'),
-    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
-]
-test_pipeline = [
-    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-    dict(
-        type='DefaultFormatBundle3D',
-        class_names=class_names,
-        with_label=False),
-    dict(type='Collect3D', keys=['points']),
-]
-data = dict(
-    samples_per_gpu=6,
-    workers_per_gpu=4,
-    train=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file=data_root + 'kitti_infos_train.pkl',
-        split='training',
-        pts_prefix='velodyne_reduced',
-        pipeline=train_pipeline,
-        modality=input_modality,
-        classes=class_names,
-        test_mode=False),
-    val=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file=data_root + 'kitti_infos_val.pkl',
-        split='training',
-        pts_prefix='velodyne_reduced',
-        pipeline=test_pipeline,
-        modality=input_modality,
-        classes=class_names,
-        test_mode=True),
-    test=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file=data_root + 'kitti_infos_val.pkl',
-        split='training',
-        pts_prefix='velodyne_reduced',
-        pipeline=test_pipeline,
-        modality=input_modality,
-        classes=class_names,
-        test_mode=True))
-# optimizer
-lr = 0.001  # max learning rate
-optimizer = dict(
-    type='AdamW',
-    lr=lr,
-    betas=(0.95, 0.99),  # the momentum is change during training
-    weight_decay=0.01)
-optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
-# learning policy
-lr_config = dict(
-    policy='cyclic',
-    target_ratio=(10, 1e-4),
-    cyclic_times=1,
-    step_ratio_up=0.4,
-)
-momentum_config = dict(
-    policy='cyclic',
-    target_ratio=(0.85 / 0.95, 1),
-    cyclic_times=1,
-    step_ratio_up=0.4,
-)
-checkpoint_config = dict(interval=1)
-evaluation = dict(interval=2)
-# yapf:disable
-log_config = dict(
-    interval=50,
-    hooks=[
-        dict(type='TextLoggerHook'),
-        dict(type='TensorboardLoggerHook')
-    ])
-# yapf:enable
-# runtime settings
-total_epochs = 80
-dist_params = dict(backend='nccl')
-log_level = 'INFO'
-work_dir = './work_dirs/pp_secfpn_80e'
-load_from = None
-resume_from = None
-workflow = [('train', 1)]
--- a/configs/second/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/second/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
-# model settings
-voxel_size = [0.05, 0.05, 0.1]
-point_cloud_range = [0, -40, -3, 70.4, 40, 1]
 model = dict(
-    type='DynamicVoxelNet',
+    type='VoxelNet',
    voxel_layer=dict(
-        max_num_points=-1,  # max_points_per_voxel
+        max_num_points=5,
-        point_cloud_range=point_cloud_range,
+        point_cloud_range=[0, -40, -3, 70.4, 40, 1],
-        voxel_size=voxel_size,
+        voxel_size=[0.05, 0.05, 0.1],
-        max_voxels=(-1, -1)  # (training, testing) max_coxels
+        max_voxels=(16000, 40000)),
-    ),
+    voxel_encoder=dict(type='HardSimpleVFE'),
-    voxel_encoder=dict(
-        type='DynamicSimpleVFE',
-        voxel_size=voxel_size,
-        point_cloud_range=point_cloud_range),
    middle_encoder=dict(
        type='SparseEncoder',
        in_channels=4,
@@ -47,8 +39,6 @@ model = dict(
            rotations=[0, 1.57],
            reshape_out=False),
        diff_rad_by_sin=True,
-        assigner_per_size=True,
-        assign_per_class=True,
        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
            type='FocalLoss',
@@ -95,120 +85,3 @@ test_cfg = dict(
    min_bbox_size=0,
    nms_pre=100,
    max_num=50)
-# dataset settings
-dataset_type = 'KittiDataset'
-data_root = 'data/kitti/'
-class_names = ['Pedestrian', 'Cyclist', 'Car']
-input_modality = dict(use_lidar=True, use_camera=False)
-db_sampler = dict(
-    data_root=data_root,
-    info_path=data_root + 'kitti_dbinfos_train.pkl',
-    rate=1.0,
-    object_rot_range=[0.0, 0.0],
-    prepare=dict(
-        filter_by_difficulty=[-1],
-        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
-    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
-    classes=class_names)
-train_pipeline = [
-    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
-    dict(type='ObjectSample', db_sampler=db_sampler),
-    dict(
-        type='ObjectNoise',
-        num_try=100,
-        loc_noise_std=[0, 0, 0],
-        global_rot_range=[0.0, 0.0],
-        rot_uniform_noise=[-0.39269908, 0.39269908]),
-    dict(type='RandomFlip3D', flip_ratio=0.5),
-    dict(
-        type='GlobalRotScale',
-        rot_uniform_noise=[-0.78539816, 0.78539816],
-        scaling_uniform_noise=[0.95, 1.05],
-        trans_normal_noise=[0.2, 0.2, 0.2]),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
-    dict(type='PointShuffle'),
-    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
-]
-test_pipeline = [
-    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-    dict(
-        type='DefaultFormatBundle3D',
-        class_names=class_names,
-        with_label=False),
-    dict(type='Collect3D', keys=['points'])
-]
-data = dict(
-    samples_per_gpu=2,
-    workers_per_gpu=2,
-    train=dict(
-        type='RepeatDataset',
-        times=2,
-        dataset=dict(
-            type=dataset_type,
-            data_root=data_root,
-            ann_file=data_root + 'kitti_infos_train.pkl',
-            split='training',
-            pts_prefix='velodyne_reduced',
-            pipeline=train_pipeline,
-            modality=input_modality,
-            classes=class_names,
-            test_mode=False)),
-    val=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file=data_root + 'kitti_infos_val.pkl',
-        split='training',
-        pts_prefix='velodyne_reduced',
-        pipeline=test_pipeline,
-        modality=input_modality,
-        classes=class_names,
-        test_mode=True),
-    test=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file=data_root + 'kitti_infos_val.pkl',
-        split='training',
-        pts_prefix='velodyne_reduced',
-        pipeline=test_pipeline,
-        modality=input_modality,
-        classes=class_names,
-        test_mode=True))
-# optimizer
-lr = 0.003  # max learning rate
-optimizer = dict(
-    type='AdamW',
-    lr=lr,
-    betas=(0.95, 0.99),  # the momentum is change during training
-    weight_decay=0.001)
-optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
-lr_config = dict(
-    policy='CosineAnealing',
-    warmup='linear',
-    warmup_iters=1000,
-    warmup_ratio=1.0 / 10,
-    min_lr_ratio=1e-5)
-momentum_config = None
-checkpoint_config = dict(interval=1)
-evaluation = dict(interval=1)
-# yapf:disable
-log_config = dict(
-    interval=50,
-    hooks=[
-        dict(type='TextLoggerHook'),
-        dict(type='TensorboardLoggerHook')
-    ])
-# yapf:enable
-# runtime settings
-total_epochs = 40
-dist_params = dict(backend='nccl', port=29502)
-log_level = 'INFO'
-work_dir = './work_dirs/sec_secfpn_80e'
-load_from = None
-resume_from = None
-workflow = [('train', 1)]
--- a/configs/_base_/models/pointpillars_second_fpn.py
+++ b/configs/_base_/models/pointpillars_second_fpn.py
+# model settings
+# Voxel size for voxel encoder
+# Usually voxel size is changed consistently with the point cloud range
+# If point cloud range is modified, do remember to change all related
+# keys in the config.
+voxel_size = [0.25, 0.25, 8]
+model = dict(
+    type='MVXFasterRCNNV2',
+    pts_voxel_layer=dict(
+        max_num_points=64,
+        point_cloud_range=[-50, -50, -5, 50, 50, 3],
+        voxel_size=voxel_size,
+        max_voxels=(30000, 40000)),
+    pts_voxel_encoder=dict(
+        type='HardVFE',
+        in_channels=4,
+        feat_channels=[64, 64],
+        with_distance=False,
+        voxel_size=voxel_size,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        point_cloud_range=[-50, -50, -5, 50, 50, 3],
+        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
+    pts_middle_encoder=dict(
+        type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
+    pts_backbone=dict(
+        type='SECOND',
+        in_channels=64,
+        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
+        layer_nums=[3, 5, 5],
+        layer_strides=[2, 2, 2],
+        out_channels=[64, 128, 256]),
+    pts_neck=dict(
+        type='FPN',
+        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
+        act_cfg=dict(type='ReLU'),
+        in_channels=[64, 128, 256],
+        out_channels=256,
+        start_level=0,
+        num_outs=3),
+    pts_bbox_head=dict(
+        type='Anchor3DHead',
+        num_classes=10,
+        in_channels=256,
+        feat_channels=256,
+        use_direction_classifier=True,
+        anchor_generator=dict(
+            type='AlignedAnchor3DRangeGenerator',
+            ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
+            scales=[1, 2, 4],
+            sizes=[
+                [0.8660, 2.5981, 1.],  # 1.5/sqrt(3)
+                [0.5774, 1.7321, 1.],  # 1/sqrt(3)
+                [1., 1., 1.],
+                [0.4, 0.4, 1],
+            ],
+            custom_values=[0, 0],
+            rotations=[0, 1.57],
+            reshape_out=True),
+        assigner_per_size=False,
+        diff_rad_by_sin=True,
+        dir_offset=0.7854,  # pi/4
+        dir_limit_offset=0,
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
+# model training and testing settings
+train_cfg = dict(
+    pts=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            iou_calculator=dict(type='BboxOverlapsNearest3D'),
+            pos_iou_thr=0.6,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        allowed_border=0,
+        code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    pts=dict(
+        use_rotate_nms=True,
+        nms_across_levels=False,
+        nms_pre=1000,
+        nms_thr=0.2,
+        score_thr=0.05,
+        min_bbox_size=0,
+        max_num=500))
--- a/configs/_base_/schedules/cyclic_40e.py
+++ b/configs/_base_/schedules/cyclic_40e.py
+# The schedule is usually used by models trained on KITTI dataset
+# The learning rate set in the cyclic schedule is the initial learning rate
+# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
+# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
+lr = 0.0018
+# The optimizer follows the setting in SECOND.Pytorch, but here we use
+# the offcial AdamW optimizer implemented by PyTorch.
+optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
+optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
+# We use cyclic learning rate and momentum schedule following SECOND.Pytorch
+# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa
+# We implement them in mmcv, for more details, please refer to
+# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa
+# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa
+lr_config = dict(
+    policy='cyclic',
+    target_ratio=(10, 1e-4),
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+momentum_config = dict(
+    policy='cyclic',
+    target_ratio=(0.85 / 0.95, 1),
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+# Although the total_epochs is 40, this schedule is usually used we
+# RepeatDataset with repeat ratio N, thus the actual total epoch
+# number could be Nx40
+total_epochs = 40
--- a/configs/_base_/schedules/schedule_2x.py
+++ b/configs/_base_/schedules/schedule_2x.py
 # optimizer
+# This schedule is mainly used by models on nuScenes dataset
 optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
 # max_norm=10 is better for SECOND
 optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))

--- a/configs/_base_/schedules/schedule_3x.py
+++ b/configs/_base_/schedules/schedule_3x.py
 # optimizer
+# This schedule is mainly used by models on indoor dataset,
+# e.g., VoteNet on SUNRGBD and ScanNet
 lr = 0.008  # max learning rate
 optimizer = dict(type='Adam', lr=lr)
 optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))

--- a/configs/benchmark/hv_pointpillars_secfpn_6x8_160e_pcdet_kitti-3d-3class.py
+++ b/configs/benchmark/hv_pointpillars_secfpn_6x8_160e_pcdet_kitti-3d-3class.py
@@ -132,14 +132,14 @@ train_pipeline = [
    dict(
        type='ObjectNoise',
        num_try=100,
-        loc_noise_std=[1.0, 1.0, 0.1],
+        translation_std=[1.0, 1.0, 0.1],
        global_rot_range=[0.0, 0.0],
-        rot_uniform_noise=[-0.78539816, 0.78539816]),
+        rot_range=[-0.78539816, 0.78539816]),
    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
-        type='GlobalRotScale',
+        type='GlobalRotScaleTrans',
-        rot_uniform_noise=[-0.78539816, 0.78539816],
+        rot_range=[-0.78539816, 0.78539816],
-        scaling_uniform_noise=[0.95, 1.05]),
+        scale_ratio_range=[0.95, 1.05]),
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
@@ -148,12 +148,26 @@ train_pipeline = [
 ]
 test_pipeline = [
    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(
-        type='DefaultFormatBundle3D',
+        type='MultiScaleFlipAug3D',
-        class_names=class_names,
+        img_scale=(1333, 800),
-        with_label=False),
+        pts_scale_ratio=1,
-    dict(type='Collect3D', keys=['points']),
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
 ]
 data = dict(

--- a/configs/benchmark/hv_second_secfpn_6x8_80e_pcdet_kitti-3d-3class.py
+++ b/configs/benchmark/hv_second_secfpn_6x8_80e_pcdet_kitti-3d-3class.py
@@ -132,14 +132,14 @@ train_pipeline = [
    dict(
        type='ObjectNoise',
        num_try=100,
-        loc_noise_std=[1.0, 1.0, 0.1],
+        translation_std=[1.0, 1.0, 0.1],
        global_rot_range=[0.0, 0.0],
-        rot_uniform_noise=[-0.78539816, 0.78539816]),
+        rot_range=[-0.78539816, 0.78539816]),
    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
-        type='GlobalRotScale',
+        type='GlobalRotScaleTrans',
-        rot_uniform_noise=[-0.78539816, 0.78539816],
+        rot_range=[-0.78539816, 0.78539816],
-        scaling_uniform_noise=[0.95, 1.05]),
+        scale_ratio_range=[0.95, 1.05]),
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
@@ -152,12 +152,26 @@ test_pipeline = [
        load_dim=4,
        use_dim=4,
        file_client_args=file_client_args),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(
-        type='DefaultFormatBundle3D',
+        type='MultiScaleFlipAug3D',
-        class_names=class_names,
+        img_scale=(1333, 800),
-        with_label=False),
+        pts_scale_ratio=1,
-    dict(type='Collect3D', keys=['points'])
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
 ]
 data = dict(

--- a/configs/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+_base_ = '../pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py'
+voxel_size = [0.16, 0.16, 4]
+point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
+model = dict(
+    type='DynamicVoxelNet',
+    voxel_layer=dict(
+        max_num_points=-1,
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(-1, -1)),
+    voxel_encoder=dict(
+        type='DynamicPillarFeatureNet',
+        in_channels=4,
+        feat_channels=[64],
+        with_distance=False,
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range))
--- a/configs/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+_base_ = '../second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py'
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]
+voxel_size = [0.05, 0.05, 0.1]
+model = dict(
+    type='DynamicVoxelNet',
+    voxel_layer=dict(
+        _delete_=True,
+        max_num_points=-1,
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(-1, -1)),
+    voxel_encoder=dict(
+        _delete_=True,
+        type='DynamicSimpleVFE',
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range))
+# optimizer
+lr = 0.003  # max learning rate
+optimizer = dict(
+    _delete_=True,
+    type='AdamW',
+    lr=lr,
+    betas=(0.95, 0.99),  # the momentum is change during training
+    weight_decay=0.001)
+lr_config = dict(
+    _delete_=True,
+    policy='CosineAnealing',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 10,
+    min_lr_ratio=1e-5)
+momentum_config = None
--- a/configs/dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+_base_ = '../second/hv_second_secfpn_6x8_80e_kitti-3d-car.py'
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]
+voxel_size = [0.05, 0.05, 0.1]
+model = dict(
+    type='DynamicVoxelNet',
+    voxel_layer=dict(
+        _delete_=True,
+        max_num_points=-1,
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(-1, -1)),
+    voxel_encoder=dict(
+        _delete_=True,
+        type='DynamicSimpleVFE',
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range))
--- a/configs/mvxnet/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+++ b/configs/mvxnet/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
@@ -158,10 +158,10 @@ train_pipeline = [
        multiscale_mode='range',
        keep_ratio=True),
    dict(
-        type='GlobalRotScale',
+        type='GlobalRotScaleTrans',
-        rot_uniform_noise=[-0.78539816, 0.78539816],
+        rot_range=[-0.78539816, 0.78539816],
-        scaling_uniform_noise=[0.95, 1.05],
+        scale_ratio_range=[0.95, 1.05],
-        trans_normal_noise=[0.2, 0.2, 0.2]),
+        translation_std=[0.2, 0.2, 0.2]),
    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
@@ -176,24 +176,28 @@ train_pipeline = [
 test_pipeline = [
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(
-        type='Resize',
+        type='MultiScaleFlipAug3D',
-        img_scale=[
+        img_scale=(1280, 384),
-            (1280, 384),
+        pts_scale_ratio=1,
-        ],
+        flip=False,
-        multiscale_mode='value',
+        transforms=[
-        keep_ratio=True),
+            dict(type='Resize', multiscale_mode='value', keep_ratio=True),
-    dict(
+            dict(
-        type='GlobalRotScale',
+                type='GlobalRotScaleTrans',
-        rot_uniform_noise=[0, 0],
+                rot_range=[0, 0],
-        scaling_uniform_noise=[1, 1]),
+                scale_ratio_range=[1., 1.],
-    dict(type='RandomFlip3D', flip_ratio=0),
+                translation_std=[0, 0, 0]),
-    dict(type='Normalize', **img_norm_cfg),
+            dict(type='RandomFlip3D'),
-    dict(type='Pad', size_divisor=32),
+            dict(type='Normalize', **img_norm_cfg),
-    dict(
+            dict(type='Pad', size_divisor=32),
-        type='DefaultFormatBundle3D',
+            dict(
-        class_names=class_names,
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-        with_label=False),
+            dict(
-    dict(type='Collect3D', keys=['points', 'img'])
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
 ]
 data = dict(

--- a/configs/mvxnet/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/mvxnet/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
-# model settings
-voxel_size = [0.16, 0.16, 4]
-point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
-model = dict(
-    type='DynamicVoxelNet',
-    voxel_layer=dict(
-        max_num_points=-1,
-        point_cloud_range=point_cloud_range,
-        voxel_size=voxel_size,
-        max_voxels=(-1, -1)),
-    voxel_encoder=dict(
-        type='DynamicPillarFeatureNet',
-        in_channels=4,
-        feat_channels=[64],
-        with_distance=False,
-        voxel_size=voxel_size,
-        point_cloud_range=point_cloud_range),
-    middle_encoder=dict(
-        type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
-    backbone=dict(
-        type='SECOND',
-        in_channels=64,
-        layer_nums=[3, 5, 5],
-        layer_strides=[2, 2, 2],
-        out_channels=[64, 128, 256]),
-    neck=dict(
-        type='SECONDFPN',
-        in_channels=[64, 128, 256],
-        upsample_strides=[1, 2, 4],
-        out_channels=[128, 128, 128]),
-    bbox_head=dict(
-        type='Anchor3DHead',
-        num_classes=1,
-        in_channels=384,
-        feat_channels=384,
-        use_direction_classifier=True,
-        anchor_generator=dict(
-            type='Anchor3DRangeGenerator',
-            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
-            sizes=[[1.6, 3.9, 1.56]],
-            rotations=[0, 1.57],
-            reshape_out=True),
-        diff_rad_by_sin=True,
-        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
-        loss_cls=dict(
-            type='FocalLoss',
-            use_sigmoid=True,
-            gamma=2.0,
-            alpha=0.25,
-            loss_weight=1.0),
-        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
-        loss_dir=dict(
-            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
-# model training and testing settings
-train_cfg = dict(
-    assigner=dict(
-        type='MaxIoUAssigner',
-        iou_calculator=dict(type='BboxOverlapsNearest3D'),
-        pos_iou_thr=0.6,
-        neg_iou_thr=0.45,
-        min_pos_iou=0.45,
-        ignore_iof_thr=-1),
-    allowed_border=0,
-    pos_weight=-1,
-    debug=False)
-test_cfg = dict(
-    use_rotate_nms=True,
-    nms_across_levels=False,
-    nms_thr=0.01,
-    score_thr=0.1,
-    min_bbox_size=0,
-    nms_pre=100,
-    max_num=50)
-# dataset settings
-dataset_type = 'KittiDataset'
-data_root = 'data/kitti/'
-class_names = ['Car']
-input_modality = dict(use_lidar=True, use_camera=False)
-db_sampler = dict(
-    root_path=data_root,
-    info_path=data_root + 'kitti_dbinfos_train.pkl',
-    rate=1.0,
-    object_rot_range=[0.0, 0.0],
-    prepare=dict(
-        filter_by_difficulty=[-1],
-        filter_by_min_points=dict(Car=5),
-    ),
-    classes=class_names,
-    sample_groups=dict(Car=15))
-train_pipeline = [
-    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
-    dict(type='ObjectSample', db_sampler=db_sampler),
-    dict(
-        type='ObjectNoise',
-        num_try=100,
-        loc_noise_std=[0.25, 0.25, 0.25],
-        global_rot_range=[0.0, 0.0],
-        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
-    dict(type='RandomFlip3D', flip_ratio=0.5),
-    dict(
-        type='GlobalRotScale',
-        rot_uniform_noise=[-0.78539816, 0.78539816],
-        scaling_uniform_noise=[0.95, 1.05]),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
-    dict(type='PointShuffle'),
-    dict(type='DefaultFormatBundle3D', class_names=class_names),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
-]
-test_pipeline = [
-    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-    dict(
-        type='DefaultFormatBundle3D',
-        class_names=class_names,
-        with_label=False),
-    dict(type='Collect3D', keys=['points'])
-]
-data = dict(
-    samples_per_gpu=6,
-    workers_per_gpu=4,
-    train=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file=data_root + 'kitti_infos_train.pkl',
-        split='training',
-        pts_prefix='velodyne_reduced',
-        pipeline=train_pipeline,
-        modality=input_modality,
-        classes=class_names,
-        test_mode=False),
-    val=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file=data_root + 'kitti_infos_val.pkl',
-        split='training',
-        pts_prefix='velodyne_reduced',
-        pipeline=test_pipeline,
-        modality=input_modality,
-        classes=class_names,
-        test_mode=True),
-    test=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file=data_root + 'kitti_infos_val.pkl',
-        split='training',
-        pts_prefix='velodyne_reduced',
-        pipeline=test_pipeline,
-        modality=input_modality,
-        classes=class_names,
-        test_mode=True))
-# optimizer
-lr = 0.001  # max learning rate
-optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
-optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
-# learning policy
-lr_config = dict(
-    policy='cyclic',
-    target_ratio=(10, 1e-4),
-    cyclic_times=1,
-    step_ratio_up=0.4)
-momentum_config = dict(
-    policy='cyclic',
-    target_ratio=(0.85 / 0.95, 1),
-    cyclic_times=1,
-    step_ratio_up=0.4)
-checkpoint_config = dict(interval=1)
-evaluation = dict(interval=2)
-# yapf:disable
-log_config = dict(
-    interval=50,
-    hooks=[
-        dict(type='TextLoggerHook'),
-        dict(type='TensorboardLoggerHook')
-    ])
-# yapf:enable
-# runtime settings
-total_epochs = 160
-dist_params = dict(backend='nccl')
-log_level = 'INFO'
-work_dir = './work_dirs/pp_secfpn_80e'
-load_from = None
-resume_from = None
-workflow = [('train', 1)]
--- a/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py
+++ b/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py
@@ -216,14 +216,14 @@ train_pipeline = [
    dict(
        type='ObjectNoise',
        num_try=100,
-        loc_noise_std=[1.0, 1.0, 0.5],
+        translation_std=[1.0, 1.0, 0.5],
        global_rot_range=[0.0, 0.0],
-        rot_uniform_noise=[-0.78539816, 0.78539816]),
+        rot_range=[-0.78539816, 0.78539816]),
    dict(type='RandomFlip3D', flip_ratio=0.5),
    dict(
-        type='GlobalRotScale',
+        type='GlobalRotScaleTrans',
-        rot_uniform_noise=[-0.78539816, 0.78539816],
+        rot_range=[-0.78539816, 0.78539816],
-        scaling_uniform_noise=[0.95, 1.05]),
+        scale_ratio_range=[0.95, 1.05]),
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectNameFilter', classes=class_names),
@@ -233,12 +233,26 @@ train_pipeline = [
 ]
 test_pipeline = [
    dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
-    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(
-        type='DefaultFormatBundle3D',
+        type='MultiScaleFlipAug3D',
-        class_names=class_names,
+        img_scale=(1333, 800),
-        with_label=False),
+        pts_scale_ratio=1,
-    dict(type='Collect3D', keys=['points'])
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['points'])
+        ])
 ]
 data = dict(