Clean unit tests

c9b69f5a · ZwwWayne · ChaimZhu · a34823dc · a34823dc · a34823dc
Commit c9b69f5a authored May 20, 2022 by ZwwWayne Committed by ChaimZhu Jul 20, 2022
20 changed files
--- a/tests/test_data/test_datasets/test_sunrgbd_dataset.py
+++ b/tests/test_data/test_datasets/test_sunrgbd_dataset.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import pytest
-import torch
-
-from mmdet3d.datasets import SUNRGBDDataset
-
-
-def _generate_sunrgbd_dataset_config():
-    root_path = './tests/data/sunrgbd'
-    # in coordinate system refactor, this test file is modified
-    ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'
-    class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
-                   'dresser', 'night_stand', 'bookshelf', 'bathtub')
-    pipelines = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='DEPTH',
-            shift_height=True,
-            load_dim=6,
-            use_dim=[0, 1, 2]),
-        dict(type='LoadAnnotations3D'),
-        dict(
-            type='RandomFlip3D',
-            sync_2d=False,
-            flip_ratio_bev_horizontal=0.5,
-        ),
-        dict(
-            type='GlobalRotScaleTrans',
-            rot_range=[-0.523599, 0.523599],
-            scale_ratio_range=[0.85, 1.15],
-            shift_height=True),
-        dict(type='PointSample', num_points=5),
-        dict(type='DefaultFormatBundle3D', class_names=class_names),
-        dict(
-            type='Collect3D',
-            keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'],
-            meta_keys=[
-                'file_name', 'pcd_horizontal_flip', 'sample_idx',
-                'pcd_scale_factor', 'pcd_rotation'
-            ]),
-    ]
-    modality = dict(use_lidar=True, use_camera=False)
-    return root_path, ann_file, class_names, pipelines, modality
-
-
-def _generate_sunrgbd_multi_modality_dataset_config():
-    root_path = './tests/data/sunrgbd'
-    ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'
-    class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
-                   'dresser', 'night_stand', 'bookshelf', 'bathtub')
-    img_norm_cfg = dict(
-        mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
-    pipelines = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='DEPTH',
-            shift_height=True,
-            load_dim=6,
-            use_dim=[0, 1, 2]),
-        dict(type='LoadImageFromFile'),
-        dict(type='LoadAnnotations3D'),
-        dict(type='LoadAnnotations', with_bbox=True),
-        dict(type='Resize', img_scale=(1333, 600), keep_ratio=True),
-        dict(type='RandomFlip', flip_ratio=0.0),
-        dict(type='Normalize', **img_norm_cfg),
-        dict(type='Pad', size_divisor=32),
-        dict(
-            type='RandomFlip3D',
-            sync_2d=False,
-            flip_ratio_bev_horizontal=0.5,
-        ),
-        dict(
-            type='GlobalRotScaleTrans',
-            rot_range=[-0.523599, 0.523599],
-            scale_ratio_range=[0.85, 1.15],
-            shift_height=True),
-        dict(type='PointSample', num_points=5),
-        dict(type='DefaultFormatBundle3D', class_names=class_names),
-        dict(
-            type='Collect3D',
-            keys=[
-                'img', 'gt_bboxes', 'gt_labels', 'points', 'gt_bboxes_3d',
-                'gt_labels_3d'
-            ])
-    ]
-    modality = dict(use_lidar=True, use_camera=True)
-    return root_path, ann_file, class_names, pipelines, modality
-
-
-def test_getitem():
-
-    from os import path as osp
-
-    np.random.seed(0)
-    root_path, ann_file, class_names, pipelines, modality = \
-        _generate_sunrgbd_dataset_config()
-
-    sunrgbd_dataset = SUNRGBDDataset(
-        root_path, ann_file, pipelines, modality=modality)
-    data = sunrgbd_dataset[0]
-    points = data['points']._data
-    gt_bboxes_3d = data['gt_bboxes_3d']._data
-    gt_labels_3d = data['gt_labels_3d']._data
-    file_name = data['img_metas']._data['file_name']
-    pcd_horizontal_flip = data['img_metas']._data['pcd_horizontal_flip']
-    pcd_scale_factor = data['img_metas']._data['pcd_scale_factor']
-    pcd_rotation = data['img_metas']._data['pcd_rotation']
-    sample_idx = data['img_metas']._data['sample_idx']
-    pcd_rotation_expected = np.array([[0.99889565, 0.04698427, 0.],
-                                      [-0.04698427, 0.99889565, 0.],
-                                      [0., 0., 1.]])
-    expected_file_name = osp.join('./tests/data/sunrgbd', 'points/000001.bin')
-    assert file_name == expected_file_name
-    assert pcd_horizontal_flip is False
-    assert abs(pcd_scale_factor - 0.9770964398016714) < 1e-5
-    assert np.allclose(pcd_rotation, pcd_rotation_expected, 1e-3)
-    assert sample_idx == 1
-    expected_points = torch.tensor([[-0.9904, 1.2596, 0.1105, 0.0905],
-                                    [-0.9948, 1.2758, 0.0437, 0.0238],
-                                    [-0.9866, 1.2641, 0.0504, 0.0304],
-                                    [-0.9915, 1.2586, 0.1265, 0.1065],
-                                    [-0.9890, 1.2561, 0.1216, 0.1017]])
-    expected_gt_bboxes_3d = torch.tensor(
-        [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486],
-         [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030],
-         [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]])
-    # coord sys refactor (rotation is correct but yaw has to be reversed)
-    expected_gt_bboxes_3d[:, 6:] = -expected_gt_bboxes_3d[:, 6:]
-    expected_gt_labels = np.array([0, 7, 6])
-    original_classes = sunrgbd_dataset.CLASSES
-
-    assert torch.allclose(points, expected_points, 1e-2)
-    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
-    assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
-    assert original_classes == class_names
-
-    SUNRGBD_dataset = SUNRGBDDataset(
-        root_path, ann_file, pipeline=None, classes=['bed', 'table'])
-    assert SUNRGBD_dataset.CLASSES != original_classes
-    assert SUNRGBD_dataset.CLASSES == ['bed', 'table']
-
-    SUNRGBD_dataset = SUNRGBDDataset(
-        root_path, ann_file, pipeline=None, classes=('bed', 'table'))
-    assert SUNRGBD_dataset.CLASSES != original_classes
-    assert SUNRGBD_dataset.CLASSES == ('bed', 'table')
-
-    import tempfile
-    with tempfile.TemporaryDirectory() as tmpdir:
-        path = tmpdir + 'classes.txt'
-        with open(path, 'w') as f:
-            f.write('bed\ntable\n')
-
-    SUNRGBD_dataset = SUNRGBDDataset(
-        root_path, ann_file, pipeline=None, classes=path)
-    assert SUNRGBD_dataset.CLASSES != original_classes
-    assert SUNRGBD_dataset.CLASSES == ['bed', 'table']
-
-    # test multi-modality SUN RGB-D dataset
-    np.random.seed(0)
-    root_path, ann_file, class_names, multi_modality_pipelines, modality = \
-        _generate_sunrgbd_multi_modality_dataset_config()
-    sunrgbd_dataset = SUNRGBDDataset(
-        root_path, ann_file, multi_modality_pipelines, modality=modality)
-    data = sunrgbd_dataset[0]
-
-    points = data['points']._data
-    gt_bboxes_3d = data['gt_bboxes_3d']._data
-    gt_labels_3d = data['gt_labels_3d']._data
-    img = data['img']._data
-    depth2img = data['img_metas']._data['depth2img']
-
-    expected_rt_mat = np.array([[0.97959, 0.012593, -0.20061],
-                                [0.012593, 0.99223, 0.12377],
-                                [0.20061, -0.12377, 0.97182]])
-    expected_k_mat = np.array([[529.5, 0., 0.], [0., 529.5, 0.],
-                               [365., 265., 1.]])
-    rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]
-                       ]) @ expected_rt_mat.transpose(1, 0)
-    expected_depth2img = expected_k_mat @ rt_mat
-
-    assert torch.allclose(points, expected_points, 1e-2)
-    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
-    assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
-    assert img.shape[:] == (3, 608, 832)
-    assert np.allclose(depth2img, expected_depth2img)
-
-
-def test_evaluate():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    from mmdet3d.core.bbox.structures import DepthInstance3DBoxes
-    root_path, ann_file, _, pipelines, modality = \
-        _generate_sunrgbd_dataset_config()
-    sunrgbd_dataset = SUNRGBDDataset(
-        root_path, ann_file, pipelines, modality=modality)
-    results = []
-    pred_boxes = dict()
-    pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
-        torch.tensor(
-            [[1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.6956],
-             [2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.6500],
-             [-1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 0.1022]]))
-    pred_boxes['labels_3d'] = torch.tensor([0, 7, 6])
-    pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0])
-    results.append(pred_boxes)
-    metric = [0.25, 0.5]
-    ap_dict = sunrgbd_dataset.evaluate(results, metric)
-    bed_precision_25 = ap_dict['bed_AP_0.25']
-    dresser_precision_25 = ap_dict['dresser_AP_0.25']
-    night_stand_precision_25 = ap_dict['night_stand_AP_0.25']
-    assert abs(bed_precision_25 - 1) < 0.01
-    assert abs(dresser_precision_25 - 1) < 0.01
-    assert abs(night_stand_precision_25 - 1) < 0.01
-
-
-def test_show():
-    import tempfile
-    from os import path as osp
-
-    import mmcv
-
-    from mmdet3d.core.bbox import DepthInstance3DBoxes
-    tmp_dir = tempfile.TemporaryDirectory()
-    temp_dir = tmp_dir.name
-    root_path, ann_file, class_names, pipelines, modality = \
-        _generate_sunrgbd_dataset_config()
-    sunrgbd_dataset = SUNRGBDDataset(
-        root_path, ann_file, pipelines, modality=modality)
-    boxes_3d = DepthInstance3DBoxes(
-        torch.tensor(
-            [[1.1500, 4.2614, -1.0669, 1.3219, 2.1593, 1.0267, 1.6473],
-             [-0.9583, 2.1916, -1.0881, 0.6213, 1.3022, 1.6275, -3.0720],
-             [2.5697, 4.8152, -1.1157, 0.5421, 0.7019, 0.7896, 1.6712],
-             [0.7283, 2.5448, -1.0356, 0.7691, 0.9056, 0.5771, 1.7121],
-             [-0.9860, 3.2413, -1.2349, 0.5110, 0.9940, 1.1245, 0.3295]]))
-    scores_3d = torch.tensor(
-        [1.5280e-01, 1.6682e-03, 6.2811e-04, 1.2860e-03, 9.4229e-06])
-    labels_3d = torch.tensor([0, 0, 0, 0, 0])
-    result = dict(boxes_3d=boxes_3d, scores_3d=scores_3d, labels_3d=labels_3d)
-    results = [result]
-    sunrgbd_dataset.show(results, temp_dir, show=False)
-    pts_file_path = osp.join(temp_dir, '000001', '000001_points.obj')
-    gt_file_path = osp.join(temp_dir, '000001', '000001_gt.obj')
-    pred_file_path = osp.join(temp_dir, '000001', '000001_pred.obj')
-    mmcv.check_file_exist(pts_file_path)
-    mmcv.check_file_exist(gt_file_path)
-    mmcv.check_file_exist(pred_file_path)
-    tmp_dir.cleanup()
-
-    # test show with pipeline
-    eval_pipeline = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='DEPTH',
-            shift_height=True,
-            load_dim=6,
-            use_dim=[0, 1, 2]),
-        dict(
-            type='DefaultFormatBundle3D',
-            class_names=class_names,
-            with_label=False),
-        dict(type='Collect3D', keys=['points'])
-    ]
-    tmp_dir = tempfile.TemporaryDirectory()
-    temp_dir = tmp_dir.name
-    sunrgbd_dataset.show(results, temp_dir, show=False, pipeline=eval_pipeline)
-    pts_file_path = osp.join(temp_dir, '000001', '000001_points.obj')
-    gt_file_path = osp.join(temp_dir, '000001', '000001_gt.obj')
-    pred_file_path = osp.join(temp_dir, '000001', '000001_pred.obj')
-    mmcv.check_file_exist(pts_file_path)
-    mmcv.check_file_exist(gt_file_path)
-    mmcv.check_file_exist(pred_file_path)
-    tmp_dir.cleanup()
-
-    # test multi-modality show
-    tmp_dir = tempfile.TemporaryDirectory()
-    temp_dir = tmp_dir.name
-    root_path, ann_file, class_names, multi_modality_pipelines, modality = \
-        _generate_sunrgbd_multi_modality_dataset_config()
-    sunrgbd_dataset = SUNRGBDDataset(
-        root_path, ann_file, multi_modality_pipelines, modality=modality)
-    sunrgbd_dataset.show(results, temp_dir, False, multi_modality_pipelines)
-    pts_file_path = osp.join(temp_dir, '000001', '000001_points.obj')
-    gt_file_path = osp.join(temp_dir, '000001', '000001_gt.obj')
-    pred_file_path = osp.join(temp_dir, '000001', '000001_pred.obj')
-    img_file_path = osp.join(temp_dir, '000001', '000001_img.png')
-    img_pred_path = osp.join(temp_dir, '000001', '000001_pred.png')
-    img_gt_file = osp.join(temp_dir, '000001', '000001_gt.png')
-    mmcv.check_file_exist(pts_file_path)
-    mmcv.check_file_exist(gt_file_path)
-    mmcv.check_file_exist(pred_file_path)
-    mmcv.check_file_exist(img_file_path)
-    mmcv.check_file_exist(img_pred_path)
-    mmcv.check_file_exist(img_gt_file)
-    tmp_dir.cleanup()
-
-    # test multi-modality show with pipeline
-    eval_pipeline = [
-        dict(type='LoadImageFromFile'),
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='DEPTH',
-            shift_height=True,
-            load_dim=6,
-            use_dim=[0, 1, 2]),
-        dict(
-            type='DefaultFormatBundle3D',
-            class_names=class_names,
-            with_label=False),
-        dict(type='Collect3D', keys=['points', 'img'])
-    ]
-    tmp_dir = tempfile.TemporaryDirectory()
-    temp_dir = tmp_dir.name
-    sunrgbd_dataset.show(results, temp_dir, show=False, pipeline=eval_pipeline)
-    pts_file_path = osp.join(temp_dir, '000001', '000001_points.obj')
-    gt_file_path = osp.join(temp_dir, '000001', '000001_gt.obj')
-    pred_file_path = osp.join(temp_dir, '000001', '000001_pred.obj')
-    img_file_path = osp.join(temp_dir, '000001', '000001_img.png')
-    img_pred_path = osp.join(temp_dir, '000001', '000001_pred.png')
-    img_gt_file = osp.join(temp_dir, '000001', '000001_gt.png')
-    mmcv.check_file_exist(pts_file_path)
-    mmcv.check_file_exist(gt_file_path)
-    mmcv.check_file_exist(pred_file_path)
-    mmcv.check_file_exist(img_file_path)
-    mmcv.check_file_exist(img_pred_path)
-    mmcv.check_file_exist(img_gt_file)
-    tmp_dir.cleanup()
--- a/tests/test_data/test_datasets/test_waymo_dataset.py
+++ b/tests/test_data/test_datasets/test_waymo_dataset.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import tempfile
-
-import numpy as np
-import pytest
-import torch
-
-from mmdet3d.datasets import WaymoDataset
-
-
-def _generate_waymo_train_dataset_config():
-    data_root = 'tests/data/waymo/kitti_format/'
-    ann_file = 'tests/data/waymo/kitti_format/waymo_infos_train.pkl'
-    classes = ['Car', 'Pedestrian', 'Cyclist']
-    pts_prefix = 'velodyne'
-    point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
-    file_client_args = dict(backend='disk')
-    db_sampler = dict(
-        data_root=data_root,
-        # in coordinate system refactor, this test file is modified
-        info_path=data_root + 'waymo_dbinfos_train.pkl',
-        rate=1.0,
-        prepare=dict(
-            filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
-        classes=classes,
-        sample_groups=dict(Car=15),
-        points_loader=dict(
-            type='LoadPointsFromFile',
-            coord_type='LIDAR',
-            load_dim=6,
-            use_dim=[0, 1, 2, 3, 4],
-            file_client_args=file_client_args))
-    pipeline = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='LIDAR',
-            load_dim=6,
-            use_dim=5,
-            file_client_args=file_client_args),
-        dict(
-            type='LoadAnnotations3D',
-            with_bbox_3d=True,
-            with_label_3d=True,
-            file_client_args=file_client_args),
-        dict(type='ObjectSample', db_sampler=db_sampler),
-        dict(
-            type='RandomFlip3D',
-            sync_2d=False,
-            flip_ratio_bev_horizontal=0.5,
-            flip_ratio_bev_vertical=0.5),
-        dict(
-            type='GlobalRotScaleTrans',
-            rot_range=[-0.78539816, 0.78539816],
-            scale_ratio_range=[0.95, 1.05]),
-        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
-        dict(type='PointShuffle'),
-        dict(type='DefaultFormatBundle3D', class_names=classes),
-        dict(
-            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
-    ]
-    modality = dict(use_lidar=True, use_camera=False)
-    split = 'training'
-    return data_root, ann_file, classes, pts_prefix, pipeline, modality, split
-
-
-def _generate_waymo_val_dataset_config():
-    data_root = 'tests/data/waymo/kitti_format/'
-    ann_file = 'tests/data/waymo/kitti_format/waymo_infos_val.pkl'
-    classes = ['Car', 'Pedestrian', 'Cyclist']
-    pts_prefix = 'velodyne'
-    point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
-    file_client_args = dict(backend='disk')
-    pipeline = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='LIDAR',
-            load_dim=6,
-            use_dim=5,
-            file_client_args=file_client_args),
-        dict(
-            type='MultiScaleFlipAug3D',
-            img_scale=(1333, 800),
-            pts_scale_ratio=1,
-            flip=False,
-            transforms=[
-                dict(
-                    type='GlobalRotScaleTrans',
-                    rot_range=[0, 0],
-                    scale_ratio_range=[1., 1.],
-                    translation_std=[0, 0, 0]),
-                dict(type='RandomFlip3D'),
-                dict(
-                    type='PointsRangeFilter',
-                    point_cloud_range=point_cloud_range),
-                dict(
-                    type='DefaultFormatBundle3D',
-                    class_names=classes,
-                    with_label=False),
-                dict(type='Collect3D', keys=['points'])
-            ])
-    ]
-    modality = dict(use_lidar=True, use_camera=False)
-    split = 'training'
-    return data_root, ann_file, classes, pts_prefix, pipeline, modality, split
-
-
-def test_getitem():
-    np.random.seed(0)
-    data_root, ann_file, classes, pts_prefix, pipeline, \
-        modality, split = _generate_waymo_train_dataset_config()
-    waymo_dataset = WaymoDataset(data_root, ann_file, split, pts_prefix,
-                                 pipeline, classes, modality)
-    data = waymo_dataset[0]
-    points = data['points']._data
-    gt_bboxes_3d = data['gt_bboxes_3d']._data
-    gt_labels_3d = data['gt_labels_3d']._data
-    expected_gt_bboxes_3d = torch.tensor(
-        [[31.8048, -0.1002, 2.1857, 6.0931, 2.3519, 3.1756, -0.1403]])
-    expected_gt_labels_3d = torch.tensor([0])
-    assert points.shape == (765, 5)
-    assert torch.allclose(
-        gt_bboxes_3d.tensor, expected_gt_bboxes_3d, atol=1e-4)
-    assert torch.all(gt_labels_3d == expected_gt_labels_3d)
-
-
-def test_evaluate():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    from mmdet3d.core.bbox import LiDARInstance3DBoxes
-    data_root, ann_file, classes, pts_prefix, pipeline, \
-        modality, split = _generate_waymo_val_dataset_config()
-    waymo_dataset = WaymoDataset(data_root, ann_file, split, pts_prefix,
-                                 pipeline, classes, modality)
-    boxes_3d = LiDARInstance3DBoxes(
-        torch.tensor([[
-            6.9684e+01, 3.3335e+01, 4.1465e-02, 4.3600e+00, 2.0100e+00,
-            1.4600e+00, 9.0000e-02 - np.pi / 2
-        ]]))
-    labels_3d = torch.tensor([0])
-    scores_3d = torch.tensor([0.5])
-    result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
-
-    # kitti protocol
-    metric = ['kitti']
-    ap_dict = waymo_dataset.evaluate([result], metric=metric)
-    assert np.isclose(ap_dict['KITTI/Overall_3D_AP11_easy'],
-                      3.0303030303030307)
-    assert np.isclose(ap_dict['KITTI/Overall_3D_AP11_moderate'],
-                      3.0303030303030307)
-    assert np.isclose(ap_dict['KITTI/Overall_3D_AP11_hard'],
-                      3.0303030303030307)
-
-    # waymo protocol
-    metric = ['waymo']
-    boxes_3d = LiDARInstance3DBoxes(
-        torch.tensor([[
-            6.9684e+01, 3.3335e+01, 4.1465e-02, 4.3600e+00, 2.0100e+00,
-            1.4600e+00, 9.0000e-02 - np.pi / 2
-        ]]))
-    labels_3d = torch.tensor([0])
-    scores_3d = torch.tensor([0.8])
-    result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
-    ap_dict = waymo_dataset.evaluate([result], metric=metric)
-    assert np.isclose(ap_dict['Overall/L1 mAP'], 0.3333333333333333)
-    assert np.isclose(ap_dict['Overall/L2 mAP'], 0.3333333333333333)
-    assert np.isclose(ap_dict['Overall/L1 mAPH'], 0.3333333333333333)
-    assert np.isclose(ap_dict['Overall/L2 mAPH'], 0.3333333333333333)
-
-
-def test_show():
-    from os import path as osp
-
-    import mmcv
-
-    from mmdet3d.core.bbox import LiDARInstance3DBoxes
-
-    # Waymo shares show function with KITTI so I just copy it here
-    tmp_dir = tempfile.TemporaryDirectory()
-    temp_dir = tmp_dir.name
-    data_root, ann_file, classes, pts_prefix, pipeline, \
-        modality, split = _generate_waymo_val_dataset_config()
-    waymo_dataset = WaymoDataset(
-        data_root, ann_file, split=split, modality=modality, pipeline=pipeline)
-    boxes_3d = LiDARInstance3DBoxes(
-        torch.tensor(
-            [[46.1218, -4.6496, -0.9275, 1.4442, 0.5316, 1.7450, 1.1749],
-             [33.3189, 0.1981, 0.3136, 1.2301, 0.5656, 1.7985, 1.5723],
-             [46.1366, -4.6404, -0.9510, 1.6501, 0.5162, 1.7540, 1.3778],
-             [33.2646, 0.2297, 0.3446, 1.3365, 0.5746, 1.7947, 1.5430],
-             [58.9079, 16.6272, -1.5829, 3.9313, 1.5656, 1.4899, 1.5505]]))
-    scores_3d = torch.tensor([0.1815, 0.1663, 0.5792, 0.2194, 0.2780])
-    labels_3d = torch.tensor([0, 0, 1, 1, 2])
-    result = dict(boxes_3d=boxes_3d, scores_3d=scores_3d, labels_3d=labels_3d)
-    results = [result]
-    waymo_dataset.show(results, temp_dir, show=False)
-    pts_file_path = osp.join(temp_dir, '1000000', '1000000_points.obj')
-    gt_file_path = osp.join(temp_dir, '1000000', '1000000_gt.obj')
-    pred_file_path = osp.join(temp_dir, '1000000', '1000000_pred.obj')
-    mmcv.check_file_exist(pts_file_path)
-    mmcv.check_file_exist(gt_file_path)
-    mmcv.check_file_exist(pred_file_path)
-    tmp_dir.cleanup()
-
-    # test show with pipeline
-    tmp_dir = tempfile.TemporaryDirectory()
-    temp_dir = tmp_dir.name
-    eval_pipeline = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='LIDAR',
-            load_dim=6,
-            use_dim=5),
-        dict(
-            type='DefaultFormatBundle3D',
-            class_names=classes,
-            with_label=False),
-        dict(type='Collect3D', keys=['points'])
-    ]
-    waymo_dataset.show(results, temp_dir, show=False, pipeline=eval_pipeline)
-    pts_file_path = osp.join(temp_dir, '1000000', '1000000_points.obj')
-    gt_file_path = osp.join(temp_dir, '1000000', '1000000_gt.obj')
-    pred_file_path = osp.join(temp_dir, '1000000', '1000000_pred.obj')
-    mmcv.check_file_exist(pts_file_path)
-    mmcv.check_file_exist(gt_file_path)
-    mmcv.check_file_exist(pred_file_path)
-    tmp_dir.cleanup()
-
-
-def test_format_results():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    from mmdet3d.core.bbox import LiDARInstance3DBoxes
-    data_root, ann_file, classes, pts_prefix, pipeline, \
-        modality, split = _generate_waymo_val_dataset_config()
-    waymo_dataset = WaymoDataset(data_root, ann_file, split, pts_prefix,
-                                 pipeline, classes, modality)
-    boxes_3d = LiDARInstance3DBoxes(
-        torch.tensor([[
-            6.9684e+01, 3.3335e+01, 4.1465e-02, 4.3600e+00, 2.0100e+00,
-            1.4600e+00, 9.0000e-02 - np.pi / 2
-        ]]))
-    labels_3d = torch.tensor([0])
-    scores_3d = torch.tensor([0.5])
-    result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
-    result_files, tmp_dir = waymo_dataset.format_results([result],
-                                                         data_format='waymo')
-    expected_name = np.array(['Car'])
-    expected_truncated = np.array([0.])
-    expected_occluded = np.array([0])
-    expected_alpha = np.array([0.35619745])
-    expected_bbox = np.array([[0., 673.59814, 37.07779, 719.7537]])
-    expected_dimensions = np.array([[4.36, 1.46, 2.01]])
-    expected_location = np.array([[-33.000042, 2.4999967, 68.29972]])
-    expected_rotation_y = np.array([-0.09])
-    expected_score = np.array([0.5])
-    expected_sample_idx = np.array([1000000])
-    assert np.all(result_files[0]['name'] == expected_name)
-    assert np.allclose(result_files[0]['truncated'], expected_truncated)
-    assert np.all(result_files[0]['occluded'] == expected_occluded)
-    assert np.allclose(result_files[0]['bbox'], expected_bbox, 1e-3)
-    assert np.allclose(result_files[0]['dimensions'], expected_dimensions)
-    assert np.allclose(result_files[0]['location'], expected_location)
-    assert np.allclose(result_files[0]['rotation_y'], expected_rotation_y)
-    assert np.allclose(result_files[0]['score'], expected_score)
-    assert np.allclose(result_files[0]['sample_idx'], expected_sample_idx)
-    assert np.allclose(result_files[0]['alpha'], expected_alpha)
-    tmp_dir.cleanup()
--- a/tests/test_data/test_pipelines/test_augmentations/test_data_augment_utils.py
+++ b/tests/test_data/test_pipelines/test_augmentations/test_data_augment_utils.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import mmcv
-import numpy as np
-
-from mmdet3d.datasets.pipelines.data_augment_utils import (
-    noise_per_object_v3_, points_transform_)
-
-
-def test_noise_per_object_v3_():
-    np.random.seed(0)
-    points = np.fromfile(
-        './tests/data/kitti/training/velodyne_reduced/000000.bin',
-        np.float32).reshape(-1, 4)
-    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')
-    info = annos[0]
-    annos = info['annos']
-    loc = annos['location']
-    dims = annos['dimensions']
-    rots = annos['rotation_y']
-    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
-                                  axis=1).astype(np.float32)
-
-    noise_per_object_v3_(gt_boxes=gt_bboxes_3d, points=points)
-    expected_gt_bboxes_3d = np.array(
-        [[3.3430212, 2.1475432, 9.388738, 1.2, 1.89, 0.48, 0.05056486]])
-
-    assert points.shape == (800, 4)
-    assert np.allclose(gt_bboxes_3d, expected_gt_bboxes_3d)
-
-
-def test_points_transform():
-    points = np.array([[46.5090, 6.1140, -0.7790, 0.0000],
-                       [42.9490, 6.4050, -0.7050, 0.0000],
-                       [42.9010, 6.5360, -0.7050, 0.0000],
-                       [46.1960, 6.0960, -1.0100, 0.0000],
-                       [43.3080, 6.2680, -0.9360, 0.0000]])
-    gt_boxes = np.array([[
-        1.5340e+01, 8.4691e+00, -1.6855e+00, 1.6400e+00, 3.7000e+00,
-        1.4900e+00, 3.1300e+00
-    ],
-                         [
-                             1.7999e+01, 8.2386e+00, -1.5802e+00, 1.5500e+00,
-                             4.0200e+00, 1.5200e+00, 3.1300e+00
-                         ],
-                         [
-                             2.9620e+01, 8.2617e+00, -1.6185e+00, 1.7800e+00,
-                             4.2500e+00, 1.9000e+00, -3.1200e+00
-                         ],
-                         [
-                             4.8218e+01, 7.8035e+00, -1.3790e+00, 1.6400e+00,
-                             3.7000e+00, 1.5200e+00, -1.0000e-02
-                         ],
-                         [
-                             3.3079e+01, -8.4817e+00, -1.3092e+00, 4.3000e-01,
-                             1.7000e+00, 1.6200e+00, -1.5700e+00
-                         ]])
-    point_masks = np.array([[False, False, False, False, False],
-                            [False, False, False, False, False],
-                            [False, False, False, False, False],
-                            [False, False, False, False, False],
-                            [False, False, False, False, False]])
-    loc_transforms = np.array([[-1.8635, -0.2774, -0.1774],
-                               [-1.0297, -1.0302, -0.3062],
-                               [1.6680, 0.2597, 0.0551],
-                               [0.2230, 0.7257, -0.0097],
-                               [-0.1403, 0.8300, 0.3431]])
-    rot_transforms = np.array([0.6888, -0.3858, 0.1910, -0.0044, -0.0036])
-    valid_mask = np.array([True, True, True, True, True])
-    points_transform_(points, gt_boxes[:, :3], point_masks, loc_transforms,
-                      rot_transforms, valid_mask)
-    assert points.shape == (5, 4)
-    assert gt_boxes.shape == (5, 7)
--- a/tests/test_data/test_pipelines/test_augmentations/test_test_augment_utils.py
+++ b/tests/test_data/test_pipelines/test_augmentations/test_test_augment_utils.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import torch
-
-from mmdet3d.core.points import DepthPoints
-from mmdet3d.datasets.pipelines import MultiScaleFlipAug3D
-
-
-def test_multi_scale_flip_aug_3D():
-    np.random.seed(0)
-    transforms = [{
-        'type': 'GlobalRotScaleTrans',
-        'rot_range': [-0.1, 0.1],
-        'scale_ratio_range': [0.9, 1.1],
-        'translation_std': [0, 0, 0]
-    }, {
-        'type': 'RandomFlip3D',
-        'sync_2d': False,
-        'flip_ratio_bev_horizontal': 0.5
-    }, {
-        'type': 'PointSample',
-        'num_points': 5
-    }, {
-        'type':
-        'DefaultFormatBundle3D',
-        'class_names': ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
-                        'dresser', 'night_stand', 'bookshelf', 'bathtub'),
-        'with_label':
-        False
-    }, {
-        'type': 'Collect3D',
-        'keys': ['points']
-    }]
-    img_scale = (1333, 800)
-    pts_scale_ratio = 1
-    multi_scale_flip_aug_3D = MultiScaleFlipAug3D(transforms, img_scale,
-                                                  pts_scale_ratio)
-    pts_file_name = 'tests/data/sunrgbd/points/000001.bin'
-    sample_idx = 4
-    file_name = 'tests/data/sunrgbd/points/000001.bin'
-    bbox3d_fields = []
-    points = np.array([[0.20397437, 1.4267826, -1.0503972, 0.16195858],
-                       [-2.2095256, 3.3159535, -0.7706928, 0.4416629],
-                       [1.5090443, 3.2764456, -1.1913797, 0.02097607],
-                       [-1.373904, 3.8711405, 0.8524302, 2.064786],
-                       [-1.8139812, 3.538856, -1.0056694, 0.20668638]])
-    points = DepthPoints(points, points_dim=4, attribute_dims=dict(height=3))
-    results = dict(
-        points=points,
-        pts_file_name=pts_file_name,
-        sample_idx=sample_idx,
-        file_name=file_name,
-        bbox3d_fields=bbox3d_fields)
-    results = multi_scale_flip_aug_3D(results)
-    expected_points = torch.tensor(
-        [[-2.2418, 3.2942, -0.7707, 0.4417], [-1.4116, 3.8575, 0.8524, 2.0648],
-         [-1.8484, 3.5210, -1.0057, 0.2067], [0.1900, 1.4287, -1.0504, 0.1620],
-         [1.4770, 3.2910, -1.1914, 0.0210]],
-        dtype=torch.float32)
-
-    assert torch.allclose(
-        results['points'][0]._data, expected_points, atol=1e-4)
--- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
+++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import mmcv
-import numpy as np
-import pytest
-import torch
-
-from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes,
-                          DepthInstance3DBoxes, LiDARInstance3DBoxes)
-from mmdet3d.core.bbox import Coord3DMode
-from mmdet3d.core.points import DepthPoints, LiDARPoints
-# yapf: disable
-from mmdet3d.datasets import (AffineResize, BackgroundPointsFilter,
-                              GlobalAlignment, GlobalRotScaleTrans,
-                              ObjectNameFilter, ObjectNoise, ObjectRangeFilter,
-                              ObjectSample, PointSample, PointShuffle,
-                              PointsRangeFilter, RandomDropPointsColor,
-                              RandomFlip3D, RandomJitterPoints,
-                              RandomShiftScale, VoxelBasedPointSampler)
-
-
-def test_remove_points_in_boxes():
-    points = np.array([[68.1370, 3.3580, 2.5160, 0.0000],
-                       [67.6970, 3.5500, 2.5010, 0.0000],
-                       [67.6490, 3.7600, 2.5000, 0.0000],
-                       [66.4140, 3.9010, 2.4590, 0.0000],
-                       [66.0120, 4.0850, 2.4460, 0.0000],
-                       [65.8340, 4.1780, 2.4400, 0.0000],
-                       [65.8410, 4.3860, 2.4400, 0.0000],
-                       [65.7450, 4.5870, 2.4380, 0.0000],
-                       [65.5510, 4.7800, 2.4320, 0.0000],
-                       [65.4860, 4.9820, 2.4300, 0.0000]])
-
-    boxes = np.array(
-        [[30.0285, 10.5110, -1.5304, 0.5100, 0.8700, 1.6000, 1.6400],
-         [7.8369, 1.6053, -1.5605, 0.5800, 1.2300, 1.8200, -3.1000],
-         [10.8740, -1.0827, -1.3310, 0.6000, 0.5200, 1.7100, 1.3500],
-         [14.9783, 2.2466, -1.4950, 0.6100, 0.7300, 1.5300, -1.9200],
-         [11.0656, 0.6195, -1.5202, 0.6600, 1.0100, 1.7600, -1.4600],
-         [10.5994, -7.9049, -1.4980, 0.5300, 1.9600, 1.6800, 1.5600],
-         [28.7068, -8.8244, -1.1485, 0.6500, 1.7900, 1.7500, 3.1200],
-         [20.2630, 5.1947, -1.4799, 0.7300, 1.7600, 1.7300, 1.5100],
-         [18.2496, 3.1887, -1.6109, 0.5600, 1.6800, 1.7100, 1.5600],
-         [7.7396, -4.3245, -1.5801, 0.5600, 1.7900, 1.8000, -0.8300]])
-    points = LiDARPoints(points, points_dim=4)
-    points = ObjectSample.remove_points_in_boxes(points, boxes)
-    assert points.tensor.numpy().shape == (10, 4)
-
-
-def test_object_sample():
-    db_sampler = mmcv.ConfigDict({
-        'data_root': './tests/data/kitti/',
-        'info_path': './tests/data/kitti/kitti_dbinfos_train.pkl',
-        'rate': 1.0,
-        'prepare': {
-            'filter_by_difficulty': [-1],
-            'filter_by_min_points': {
-                'Pedestrian': 10
-            }
-        },
-        'classes': ['Pedestrian', 'Cyclist', 'Car'],
-        'sample_groups': {
-            'Pedestrian': 6
-        }
-    })
-    np.random.seed(0)
-    object_sample = ObjectSample(db_sampler)
-    points = np.fromfile(
-        './tests/data/kitti/training/velodyne_reduced/000000.bin',
-        np.float32).reshape(-1, 4)
-    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')
-    info = annos[0]
-    rect = info['calib']['R0_rect'].astype(np.float32)
-    Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
-    annos = info['annos']
-    loc = annos['location']
-    dims = annos['dimensions']
-    rots = annos['rotation_y']
-    gt_names = annos['name']
-
-    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
-                                  axis=1).astype(np.float32)
-    gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
-        Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
-    CLASSES = ('Pedestrian', 'Cyclist', 'Car')
-    gt_labels = []
-    for cat in gt_names:
-        if cat in CLASSES:
-            gt_labels.append(CLASSES.index(cat))
-        else:
-            gt_labels.append(-1)
-    gt_labels = np.array(gt_labels, dtype=np.int64)
-    points = LiDARPoints(points, points_dim=4)
-    input_dict = dict(
-        points=points, gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels)
-    input_dict = object_sample(input_dict)
-    points = input_dict['points']
-    gt_bboxes_3d = input_dict['gt_bboxes_3d']
-    gt_labels_3d = input_dict['gt_labels_3d']
-    repr_str = repr(object_sample)
-    expected_repr_str = 'ObjectSample sample_2d=False, ' \
-                        'data_root=./tests/data/kitti/, ' \
-                        'info_path=./tests/data/kitti/kitti' \
-                        '_dbinfos_train.pkl, rate=1.0, ' \
-                        'prepare={\'filter_by_difficulty\': [-1], ' \
-                        '\'filter_by_min_points\': {\'Pedestrian\': 10}}, ' \
-                        'classes=[\'Pedestrian\', \'Cyclist\', \'Car\'], ' \
-                        'sample_groups={\'Pedestrian\': 6}'
-    assert repr_str == expected_repr_str
-    assert points.tensor.numpy().shape == (800, 4)
-    assert gt_bboxes_3d.tensor.shape == (1, 7)
-    assert np.all(gt_labels_3d == [0])
-
-
-def test_object_noise():
-    np.random.seed(0)
-    object_noise = ObjectNoise()
-    points = np.fromfile(
-        './tests/data/kitti/training/velodyne_reduced/000000.bin',
-        np.float32).reshape(-1, 4)
-    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')
-    info = annos[0]
-    rect = info['calib']['R0_rect'].astype(np.float32)
-    Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
-    annos = info['annos']
-    loc = annos['location']
-    dims = annos['dimensions']
-    rots = annos['rotation_y']
-    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
-                                  axis=1).astype(np.float32)
-    gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
-        Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
-    points = LiDARPoints(points, points_dim=4)
-    input_dict = dict(points=points, gt_bboxes_3d=gt_bboxes_3d)
-    input_dict = object_noise(input_dict)
-    points = input_dict['points']
-    gt_bboxes_3d = input_dict['gt_bboxes_3d'].tensor
-
-    # coord sys refactor (lidar2cam)
-    expected_gt_bboxes_3d = torch.tensor([[
-        9.1724, -1.7559, -1.3550, 1.2000, 0.4800, 1.8900,
-        0.0505 - float(rots) * 2 - np.pi / 2
-    ]])
-    repr_str = repr(object_noise)
-    expected_repr_str = 'ObjectNoise(num_try=100, ' \
-                        'translation_std=[0.25, 0.25, 0.25], ' \
-                        'global_rot_range=[0.0, 0.0], ' \
-                        'rot_range=[-0.15707963267, 0.15707963267])'
-
-    assert repr_str == expected_repr_str
-    assert points.tensor.numpy().shape == (800, 4)
-    assert torch.allclose(gt_bboxes_3d, expected_gt_bboxes_3d, 1e-3)
-
-
-def test_object_name_filter():
-    class_names = ['Pedestrian']
-    object_name_filter = ObjectNameFilter(class_names)
-
-    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')
-    info = annos[0]
-    rect = info['calib']['R0_rect'].astype(np.float32)
-    Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
-    annos = info['annos']
-    loc = annos['location']
-    dims = annos['dimensions']
-    rots = annos['rotation_y']
-    gt_names = annos['name']
-
-    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
-                                  axis=1).astype(np.float32)
-    gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
-        Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
-    CLASSES = ('Pedestrian', 'Cyclist', 'Car')
-    gt_labels = []
-    for cat in gt_names:
-        if cat in CLASSES:
-            gt_labels.append(CLASSES.index(cat))
-        else:
-            gt_labels.append(-1)
-    gt_labels = np.array(gt_labels, dtype=np.int64)
-    input_dict = dict(
-        gt_bboxes_3d=gt_bboxes_3d.clone(), gt_labels_3d=gt_labels.copy())
-
-    results = object_name_filter(input_dict)
-    bboxes_3d = results['gt_bboxes_3d']
-    labels_3d = results['gt_labels_3d']
-    keep_mask = np.array([name in class_names for name in gt_names])
-    assert torch.allclose(gt_bboxes_3d.tensor[keep_mask], bboxes_3d.tensor)
-    assert np.all(gt_labels[keep_mask] == labels_3d)
-
-    repr_str = repr(object_name_filter)
-    expected_repr_str = f'ObjectNameFilter(classes={class_names})'
-    assert repr_str == expected_repr_str
-
-
-def test_point_shuffle():
-    np.random.seed(0)
-    torch.manual_seed(0)
-    point_shuffle = PointShuffle()
-
-    points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
-                         np.float32).reshape(-1, 6)
-    ins_mask = np.fromfile('tests/data/scannet/instance_mask/scene0000_00.bin',
-                           np.int64)
-    sem_mask = np.fromfile('tests/data/scannet/semantic_mask/scene0000_00.bin',
-                           np.int64)
-
-    points = DepthPoints(
-        points.copy(), points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
-    input_dict = dict(
-        points=points.clone(),
-        pts_instance_mask=ins_mask.copy(),
-        pts_semantic_mask=sem_mask.copy())
-    results = point_shuffle(input_dict)
-
-    shuffle_pts = results['points']
-    shuffle_ins_mask = results['pts_instance_mask']
-    shuffle_sem_mask = results['pts_semantic_mask']
-
-    shuffle_idx = np.array([
-        44, 19, 93, 90, 71, 69, 37, 95, 53, 91, 81, 42, 80, 85, 74, 56, 76, 63,
-        82, 40, 26, 92, 57, 10, 16, 66, 89, 41, 97, 8, 31, 24, 35, 30, 65, 7,
-        98, 23, 20, 29, 78, 61, 94, 15, 4, 52, 59, 5, 54, 46, 3, 28, 2, 70, 6,
-        60, 49, 68, 55, 72, 79, 77, 45, 1, 32, 34, 11, 0, 22, 12, 87, 50, 25,
-        47, 36, 96, 9, 83, 62, 84, 18, 17, 75, 67, 13, 48, 39, 21, 64, 88, 38,
-        27, 14, 73, 33, 58, 86, 43, 99, 51
-    ])
-    expected_pts = points.tensor.numpy()[shuffle_idx]
-    expected_ins_mask = ins_mask[shuffle_idx]
-    expected_sem_mask = sem_mask[shuffle_idx]
-
-    assert np.allclose(shuffle_pts.tensor.numpy(), expected_pts)
-    assert np.all(shuffle_ins_mask == expected_ins_mask)
-    assert np.all(shuffle_sem_mask == expected_sem_mask)
-
-    repr_str = repr(point_shuffle)
-    expected_repr_str = 'PointShuffle'
-    assert repr_str == expected_repr_str
-
-
-def test_points_range_filter():
-    pcd_range = [0.0, 0.0, 0.0, 3.0, 3.0, 3.0]
-    points_range_filter = PointsRangeFilter(pcd_range)
-
-    points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
-                         np.float32).reshape(-1, 6)
-    ins_mask = np.fromfile('tests/data/scannet/instance_mask/scene0000_00.bin',
-                           np.int64)
-    sem_mask = np.fromfile('tests/data/scannet/semantic_mask/scene0000_00.bin',
-                           np.int64)
-
-    points = DepthPoints(
-        points.copy(), points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
-    input_dict = dict(
-        points=points.clone(),
-        pts_instance_mask=ins_mask.copy(),
-        pts_semantic_mask=sem_mask.copy())
-    results = points_range_filter(input_dict)
-    shuffle_pts = results['points']
-    shuffle_ins_mask = results['pts_instance_mask']
-    shuffle_sem_mask = results['pts_semantic_mask']
-
-    select_idx = np.array(
-        [5, 11, 22, 26, 27, 33, 46, 47, 56, 63, 74, 78, 79, 91])
-    expected_pts = points.tensor.numpy()[select_idx]
-    expected_ins_mask = ins_mask[select_idx]
-    expected_sem_mask = sem_mask[select_idx]
-
-    assert np.allclose(shuffle_pts.tensor.numpy(), expected_pts)
-    assert np.all(shuffle_ins_mask == expected_ins_mask)
-    assert np.all(shuffle_sem_mask == expected_sem_mask)
-
-    repr_str = repr(points_range_filter)
-    expected_repr_str = f'PointsRangeFilter(point_cloud_range={pcd_range})'
-    assert repr_str == expected_repr_str
-
-
-def test_object_range_filter():
-    point_cloud_range = [0, -40, -3, 70.4, 40, 1]
-    object_range_filter = ObjectRangeFilter(point_cloud_range)
-
-    bbox = np.array(
-        [[8.7314, -1.8559, -0.6547, 0.4800, 1.2000, 1.8900, 0.0100],
-         [28.7314, -18.559, 0.6547, 2.4800, 1.6000, 1.9200, 5.0100],
-         [-2.54, -1.8559, -0.6547, 0.4800, 1.2000, 1.8900, 0.0100],
-         [72.7314, -18.559, 0.6547, 6.4800, 11.6000, 4.9200, -0.0100],
-         [18.7314, -18.559, 20.6547, 6.4800, 8.6000, 3.9200, -1.0100],
-         [3.7314, 42.559, -0.6547, 6.4800, 8.6000, 2.9200, 3.0100]])
-    gt_bboxes_3d = LiDARInstance3DBoxes(bbox, origin=(0.5, 0.5, 0.5))
-    gt_labels_3d = np.array([0, 2, 1, 1, 2, 0], dtype=np.int64)
-
-    input_dict = dict(
-        gt_bboxes_3d=gt_bboxes_3d.clone(), gt_labels_3d=gt_labels_3d.copy())
-    results = object_range_filter(input_dict)
-    bboxes_3d = results['gt_bboxes_3d']
-    labels_3d = results['gt_labels_3d']
-    keep_mask = np.array([True, True, False, False, True, False])
-    expected_bbox = gt_bboxes_3d.tensor[keep_mask]
-    expected_bbox[1, 6] -= 2 * np.pi  # limit yaw
-
-    assert torch.allclose(expected_bbox, bboxes_3d.tensor)
-    assert np.all(gt_labels_3d[keep_mask] == labels_3d)
-
-    repr_str = repr(object_range_filter)
-    expected_repr_str = 'ObjectRangeFilter(point_cloud_range=' \
-        '[0.0, -40.0, -3.0, 70.4000015258789, 40.0, 1.0])'
-    assert repr_str == expected_repr_str
-
-
-def test_global_alignment():
-    np.random.seed(0)
-    global_alignment = GlobalAlignment(rotation_axis=2)
-
-    points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
-                         np.float32).reshape(-1, 6)
-    annos = mmcv.load('tests/data/scannet/scannet_infos.pkl')
-    info = annos[0]
-    axis_align_matrix = info['annos']['axis_align_matrix']
-
-    depth_points = DepthPoints(points.copy(), points_dim=6)
-
-    input_dict = dict(
-        points=depth_points.clone(),
-        ann_info=dict(axis_align_matrix=axis_align_matrix))
-
-    input_dict = global_alignment(input_dict)
-    trans_depth_points = input_dict['points']
-
-    # construct expected transformed points by affine transformation
-    pts = np.ones((points.shape[0], 4))
-    pts[:, :3] = points[:, :3]
-    trans_pts = np.dot(pts, axis_align_matrix.T)
-    expected_points = np.concatenate([trans_pts[:, :3], points[:, 3:]], axis=1)
-
-    assert np.allclose(
-        trans_depth_points.tensor.numpy(), expected_points, atol=1e-6)
-
-    repr_str = repr(global_alignment)
-    expected_repr_str = 'GlobalAlignment(rotation_axis=2)'
-    assert repr_str == expected_repr_str
-
-
-def test_global_rot_scale_trans():
-    angle = 0.78539816
-    scale = [0.95, 1.05]
-    trans_std = 1.0
-
-    # rot_range should be a number or seq of numbers
-    with pytest.raises(AssertionError):
-        global_rot_scale_trans = GlobalRotScaleTrans(rot_range='0.0')
-
-    # scale_ratio_range should be seq of numbers
-    with pytest.raises(AssertionError):
-        global_rot_scale_trans = GlobalRotScaleTrans(scale_ratio_range=1.0)
-
-    # translation_std should be a positive number or seq of positive numbers
-    with pytest.raises(AssertionError):
-        global_rot_scale_trans = GlobalRotScaleTrans(translation_std='0.0')
-    with pytest.raises(AssertionError):
-        global_rot_scale_trans = GlobalRotScaleTrans(translation_std=-1.0)
-
-    global_rot_scale_trans = GlobalRotScaleTrans(
-        rot_range=angle,
-        scale_ratio_range=scale,
-        translation_std=trans_std,
-        shift_height=False)
-
-    np.random.seed(0)
-    points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
-                         np.float32).reshape(-1, 6)
-    annos = mmcv.load('tests/data/scannet/scannet_infos.pkl')
-    info = annos[0]
-    gt_bboxes_3d = info['annos']['gt_boxes_upright_depth']
-
-    depth_points = DepthPoints(
-        points.copy(), points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
-    gt_bboxes_3d = DepthInstance3DBoxes(
-        gt_bboxes_3d.copy(),
-        box_dim=gt_bboxes_3d.shape[-1],
-        with_yaw=False,
-        origin=(0.5, 0.5, 0.5))
-
-    input_dict = dict(
-        points=depth_points.clone(),
-        bbox3d_fields=['gt_bboxes_3d'],
-        gt_bboxes_3d=gt_bboxes_3d.clone())
-
-    input_dict = global_rot_scale_trans(input_dict)
-    trans_depth_points = input_dict['points']
-    trans_bboxes_3d = input_dict['gt_bboxes_3d']
-
-    noise_rot = 0.07667607233534723
-    scale_factor = 1.021518936637242
-    trans_factor = np.array([0.97873798, 2.2408932, 1.86755799])
-
-    true_depth_points = depth_points.clone()
-    true_bboxes_3d = gt_bboxes_3d.clone()
-    true_depth_points, noise_rot_mat_T = true_bboxes_3d.rotate(
-        noise_rot, true_depth_points)
-    true_bboxes_3d.scale(scale_factor)
-    true_bboxes_3d.translate(trans_factor)
-    true_depth_points.scale(scale_factor)
-    true_depth_points.translate(trans_factor)
-
-    assert torch.allclose(
-        trans_depth_points.tensor, true_depth_points.tensor, atol=1e-6)
-    assert torch.allclose(
-        trans_bboxes_3d.tensor, true_bboxes_3d.tensor, atol=1e-6)
-    assert input_dict['pcd_scale_factor'] == scale_factor
-    assert torch.allclose(
-        input_dict['pcd_rotation'], noise_rot_mat_T, atol=1e-6)
-    assert np.allclose(input_dict['pcd_trans'], trans_factor)
-
-    repr_str = repr(global_rot_scale_trans)
-    expected_repr_str = f'GlobalRotScaleTrans(rot_range={[-angle, angle]},' \
-                        f' scale_ratio_range={scale},' \
-                        f' translation_std={[trans_std for _ in range(3)]},' \
-                        f' shift_height=False)'
-    assert repr_str == expected_repr_str
-
-    # points with shift_height but no bbox
-    global_rot_scale_trans = GlobalRotScaleTrans(
-        rot_range=angle,
-        scale_ratio_range=scale,
-        translation_std=trans_std,
-        shift_height=True)
-
-    # points should have height attribute when shift_height=True
-    with pytest.raises(AssertionError):
-        input_dict = global_rot_scale_trans(input_dict)
-
-    np.random.seed(0)
-    shift_height = points[:, 2:3] * 0.99
-    points = np.concatenate([points, shift_height], axis=1)
-    depth_points = DepthPoints(
-        points.copy(),
-        points_dim=7,
-        attribute_dims=dict(color=[3, 4, 5], height=6))
-
-    input_dict = dict(points=depth_points.clone(), bbox3d_fields=[])
-
-    input_dict = global_rot_scale_trans(input_dict)
-    trans_depth_points = input_dict['points']
-    true_shift_height = shift_height * scale_factor
-
-    assert np.allclose(
-        trans_depth_points.tensor.numpy(),
-        np.concatenate([true_depth_points.tensor.numpy(), true_shift_height],
-                       axis=1),
-        atol=1e-6)
-
-
-def test_random_drop_points_color():
-    # drop_ratio should be in [0, 1]
-    with pytest.raises(AssertionError):
-        random_drop_points_color = RandomDropPointsColor(drop_ratio=1.1)
-
-    # 100% drop
-    random_drop_points_color = RandomDropPointsColor(drop_ratio=1)
-
-    points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
-                         np.float32).reshape(-1, 6)
-    depth_points = DepthPoints(
-        points.copy(), points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
-
-    input_dict = dict(points=depth_points.clone())
-
-    input_dict = random_drop_points_color(input_dict)
-    trans_depth_points = input_dict['points']
-    trans_color = trans_depth_points.color
-    assert torch.all(trans_color == trans_color.new_zeros(trans_color.shape))
-
-    # 0% drop
-    random_drop_points_color = RandomDropPointsColor(drop_ratio=0)
-    input_dict = dict(points=depth_points.clone())
-
-    input_dict = random_drop_points_color(input_dict)
-    trans_depth_points = input_dict['points']
-    trans_color = trans_depth_points.color
-    assert torch.allclose(trans_color, depth_points.tensor[:, 3:6])
-
-    random_drop_points_color = RandomDropPointsColor(drop_ratio=0.5)
-    repr_str = repr(random_drop_points_color)
-    expected_repr_str = 'RandomDropPointsColor(drop_ratio=0.5)'
-    assert repr_str == expected_repr_str
-
-
-def test_random_flip_3d():
-    random_flip_3d = RandomFlip3D(
-        flip_ratio_bev_horizontal=1.0, flip_ratio_bev_vertical=1.0)
-    points = np.array([[22.7035, 9.3901, -0.2848, 0.0000],
-                       [21.9826, 9.1766, -0.2698, 0.0000],
-                       [21.4329, 9.0209, -0.2578, 0.0000],
-                       [21.3068, 9.0205, -0.2558, 0.0000],
-                       [21.3400, 9.1305, -0.2578, 0.0000],
-                       [21.3291, 9.2099, -0.2588, 0.0000],
-                       [21.2759, 9.2599, -0.2578, 0.0000],
-                       [21.2686, 9.2982, -0.2588, 0.0000],
-                       [21.2334, 9.3607, -0.2588, 0.0000],
-                       [21.2179, 9.4372, -0.2598, 0.0000]])
-    bbox3d_fields = ['gt_bboxes_3d']
-    img_fields = []
-    box_type_3d = LiDARInstance3DBoxes
-    gt_bboxes_3d = LiDARInstance3DBoxes(
-        torch.tensor(
-            [[38.9229, 18.4417, -1.1459, 0.7100, 1.7600, 1.8600, -2.2652],
-             [12.7768, 0.5795, -2.2682, 0.5700, 0.9900, 1.7200, -2.5029],
-             [12.7557, 2.2996, -1.4869, 0.6100, 1.1100, 1.9000, -1.9390],
-             [10.6677, 0.8064, -1.5435, 0.7900, 0.9600, 1.7900, 1.0856],
-             [5.0903, 5.1004, -1.2694, 0.7100, 1.7000, 1.8300, -1.9136]]))
-    points = LiDARPoints(points, points_dim=4)
-    input_dict = dict(
-        points=points,
-        bbox3d_fields=bbox3d_fields,
-        box_type_3d=box_type_3d,
-        img_fields=img_fields,
-        gt_bboxes_3d=gt_bboxes_3d)
-    input_dict = random_flip_3d(input_dict)
-    points = input_dict['points'].tensor.numpy()
-    gt_bboxes_3d = input_dict['gt_bboxes_3d'].tensor
-    expected_points = np.array([[22.7035, -9.3901, -0.2848, 0.0000],
-                                [21.9826, -9.1766, -0.2698, 0.0000],
-                                [21.4329, -9.0209, -0.2578, 0.0000],
-                                [21.3068, -9.0205, -0.2558, 0.0000],
-                                [21.3400, -9.1305, -0.2578, 0.0000],
-                                [21.3291, -9.2099, -0.2588, 0.0000],
-                                [21.2759, -9.2599, -0.2578, 0.0000],
-                                [21.2686, -9.2982, -0.2588, 0.0000],
-                                [21.2334, -9.3607, -0.2588, 0.0000],
-                                [21.2179, -9.4372, -0.2598, 0.0000]])
-    expected_gt_bboxes_3d = torch.tensor(
-        [[38.9229, -18.4417, -1.1459, 0.7100, 1.7600, 1.8600, 2.2652],
-         [12.7768, -0.5795, -2.2682, 0.5700, 0.9900, 1.7200, 2.5029],
-         [12.7557, -2.2996, -1.4869, 0.6100, 1.1100, 1.9000, 1.9390],
-         [10.6677, -0.8064, -1.5435, 0.7900, 0.9600, 1.7900, -1.0856],
-         [5.0903, -5.1004, -1.2694, 0.7100, 1.7000, 1.8300, 1.9136]])
-    repr_str = repr(random_flip_3d)
-    expected_repr_str = 'RandomFlip3D(sync_2d=True,' \
-                        ' flip_ratio_bev_vertical=1.0)'
-    assert np.allclose(points, expected_points)
-    assert torch.allclose(gt_bboxes_3d, expected_gt_bboxes_3d)
-    assert repr_str == expected_repr_str
-
-
-def test_random_jitter_points():
-    # jitter_std should be a number or seq of numbers
-    with pytest.raises(AssertionError):
-        random_jitter_points = RandomJitterPoints(jitter_std='0.0')
-
-    # clip_range should be a number or seq of numbers
-    with pytest.raises(AssertionError):
-        random_jitter_points = RandomJitterPoints(clip_range='0.0')
-
-    random_jitter_points = RandomJitterPoints(jitter_std=0.01, clip_range=0.05)
-    np.random.seed(0)
-    points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
-                         np.float32).reshape(-1, 6)[:10]
-    depth_points = DepthPoints(
-        points.copy(), points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
-
-    input_dict = dict(points=depth_points.clone())
-
-    input_dict = random_jitter_points(input_dict)
-    trans_depth_points = input_dict['points']
-
-    jitter_noise = np.array([[0.01764052, 0.00400157, 0.00978738],
-                             [0.02240893, 0.01867558, -0.00977278],
-                             [0.00950088, -0.00151357, -0.00103219],
-                             [0.00410598, 0.00144044, 0.01454273],
-                             [0.00761038, 0.00121675, 0.00443863],
-                             [0.00333674, 0.01494079, -0.00205158],
-                             [0.00313068, -0.00854096, -0.0255299],
-                             [0.00653619, 0.00864436, -0.00742165],
-                             [0.02269755, -0.01454366, 0.00045759],
-                             [-0.00187184, 0.01532779, 0.01469359]])
-
-    trans_depth_points = trans_depth_points.tensor.numpy()
-    expected_depth_points = points
-    expected_depth_points[:, :3] += jitter_noise
-    assert np.allclose(trans_depth_points, expected_depth_points)
-
-    repr_str = repr(random_jitter_points)
-    jitter_std = [0.01, 0.01, 0.01]
-    clip_range = [-0.05, 0.05]
-    expected_repr_str = f'RandomJitterPoints(jitter_std={jitter_std},' \
-                        f' clip_range={clip_range})'
-    assert repr_str == expected_repr_str
-
-    # test clipping very large noise
-    random_jitter_points = RandomJitterPoints(jitter_std=1.0, clip_range=0.05)
-    input_dict = dict(points=depth_points.clone())
-
-    input_dict = random_jitter_points(input_dict)
-    trans_depth_points = input_dict['points']
-    assert (trans_depth_points.tensor - depth_points.tensor).max().item() <= \
-        0.05 + 1e-6
-    assert (trans_depth_points.tensor - depth_points.tensor).min().item() >= \
-        -0.05 - 1e-6
-
-
-def test_background_points_filter():
-    np.random.seed(0)
-    background_points_filter = BackgroundPointsFilter((0.5, 2.0, 0.5))
-    points = np.fromfile(
-        './tests/data/kitti/training/velodyne_reduced/000000.bin',
-        np.float32).reshape(-1, 4)
-    orig_points = points.copy()
-    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')
-    info = annos[0]
-    rect = info['calib']['R0_rect'].astype(np.float32)
-    Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
-    annos = info['annos']
-    loc = annos['location']
-    dims = annos['dimensions']
-    rots = annos['rotation_y']
-    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
-                                  axis=1).astype(np.float32)
-    gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
-        Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
-    extra_points = gt_bboxes_3d.corners.reshape(8, 3)[[1, 2, 5, 6], :]
-    extra_points[:, 2] += 0.1
-    extra_points = torch.cat([extra_points, extra_points.new_zeros(4, 1)], 1)
-    points = np.concatenate([points, extra_points.numpy()], 0)
-    points = LiDARPoints(points, points_dim=4)
-    input_dict = dict(points=points, gt_bboxes_3d=gt_bboxes_3d)
-    origin_gt_bboxes_3d = gt_bboxes_3d.clone()
-    input_dict = background_points_filter(input_dict)
-
-    points = input_dict['points'].tensor.numpy()
-    repr_str = repr(background_points_filter)
-    expected_repr_str = 'BackgroundPointsFilter(bbox_enlarge_range=' \
-                        '[[0.5, 2.0, 0.5]])'
-    assert repr_str == expected_repr_str
-    assert points.shape == (800, 4)
-    assert np.equal(orig_points, points).all()
-    assert np.equal(input_dict['gt_bboxes_3d'].tensor.numpy(),
-                    origin_gt_bboxes_3d.tensor.numpy()).all()
-
-    # test single float config
-    BackgroundPointsFilter(0.5)
-
-    # The length of bbox_enlarge_range should be 3
-    with pytest.raises(AssertionError):
-        BackgroundPointsFilter((0.5, 2.0))
-
-
-def test_voxel_based_point_filter():
-    np.random.seed(0)
-    cur_sweep_cfg = dict(
-        voxel_size=[0.1, 0.1, 0.1],
-        point_cloud_range=[-50, -50, -4, 50, 50, 2],
-        max_num_points=1,
-        max_voxels=1024)
-    prev_sweep_cfg = dict(
-        voxel_size=[0.1, 0.1, 0.1],
-        point_cloud_range=[-50, -50, -4, 50, 50, 2],
-        max_num_points=1,
-        max_voxels=1024)
-    voxel_based_points_filter = VoxelBasedPointSampler(
-        cur_sweep_cfg, prev_sweep_cfg, time_dim=3)
-    points = np.stack([
-        np.random.rand(4096) * 120 - 60,
-        np.random.rand(4096) * 120 - 60,
-        np.random.rand(4096) * 10 - 6
-    ],
-                      axis=-1)
-
-    input_time = np.concatenate([np.zeros([2048, 1]), np.ones([2048, 1])], 0)
-    input_points = np.concatenate([points, input_time], 1)
-    input_points = LiDARPoints(input_points, points_dim=4)
-    input_dict = dict(
-        points=input_points, pts_mask_fields=[], pts_seg_fields=[])
-    input_dict = voxel_based_points_filter(input_dict)
-
-    points = input_dict['points']
-    repr_str = repr(voxel_based_points_filter)
-    expected_repr_str = """VoxelBasedPointSampler(
-    num_cur_sweep=1024,
-    num_prev_sweep=1024,
-    time_dim=3,
-    cur_voxel_generator=
-        VoxelGenerator(voxel_size=[0.1 0.1 0.1],
-                       point_cloud_range=[-50.0, -50.0, -4.0, 50.0, 50.0, 2.0],
-                       max_num_points=1,
-                       max_voxels=1024,
-                       grid_size=[1000, 1000, 60]),
-    prev_voxel_generator=
-        VoxelGenerator(voxel_size=[0.1 0.1 0.1],
-                       point_cloud_range=[-50.0, -50.0, -4.0, 50.0, 50.0, 2.0],
-                       max_num_points=1,
-                       max_voxels=1024,
-                       grid_size=[1000, 1000, 60]))"""
-
-    assert repr_str == expected_repr_str
-    assert points.shape == (2048, 4)
-    assert (points.tensor[:, :3].min(0)[0].numpy() <
-            cur_sweep_cfg['point_cloud_range'][0:3]).sum() == 0
-    assert (points.tensor[:, :3].max(0)[0].numpy() >
-            cur_sweep_cfg['point_cloud_range'][3:6]).sum() == 0
-
-    # Test instance mask and semantic mask
-    input_dict = dict(points=input_points)
-    input_dict['pts_instance_mask'] = np.random.randint(0, 10, [4096])
-    input_dict['pts_semantic_mask'] = np.random.randint(0, 6, [4096])
-    input_dict['pts_mask_fields'] = ['pts_instance_mask']
-    input_dict['pts_seg_fields'] = ['pts_semantic_mask']
-
-    input_dict = voxel_based_points_filter(input_dict)
-    pts_instance_mask = input_dict['pts_instance_mask']
-    pts_semantic_mask = input_dict['pts_semantic_mask']
-    assert pts_instance_mask.shape == (2048, )
-    assert pts_semantic_mask.shape == (2048, )
-    assert pts_instance_mask.max() < 10
-    assert pts_instance_mask.min() >= 0
-    assert pts_semantic_mask.max() < 6
-    assert pts_semantic_mask.min() >= 0
-
-
-def test_points_sample():
-    np.random.seed(0)
-    points = np.fromfile(
-        './tests/data/kitti/training/velodyne_reduced/000000.bin',
-        np.float32).reshape(-1, 4)
-    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')
-    info = annos[0]
-    rect = torch.tensor(info['calib']['R0_rect'].astype(np.float32))
-    Trv2c = torch.tensor(info['calib']['Tr_velo_to_cam'].astype(np.float32))
-
-    points = LiDARPoints(
-        points.copy(), points_dim=4).convert_to(Coord3DMode.CAM, rect @ Trv2c)
-    num_points = 20
-    sample_range = 40
-    input_dict = dict(points=points.clone())
-
-    point_sample = PointSample(
-        num_points=num_points, sample_range=sample_range)
-    sampled_pts = point_sample(input_dict)['points']
-
-    select_idx = np.array([
-        622, 146, 231, 444, 504, 533, 80, 401, 379, 2, 707, 562, 176, 491, 496,
-        464, 15, 590, 194, 449
-    ])
-    expected_pts = points.tensor.numpy()[select_idx]
-    assert np.allclose(sampled_pts.tensor.numpy(), expected_pts)
-
-    repr_str = repr(point_sample)
-    expected_repr_str = f'PointSample(num_points={num_points}, ' \
-                        f'sample_range={sample_range}, ' \
-                        'replace=False)'
-    assert repr_str == expected_repr_str
-
-    # test when number of far points are larger than number of sampled points
-    np.random.seed(0)
-    point_sample = PointSample(num_points=2, sample_range=sample_range)
-    input_dict = dict(points=points.clone())
-    sampled_pts = point_sample(input_dict)['points']
-
-    select_idx = np.array([449, 444])
-    expected_pts = points.tensor.numpy()[select_idx]
-    assert np.allclose(sampled_pts.tensor.numpy(), expected_pts)
-
-
-def test_affine_resize():
-
-    def create_random_bboxes(num_bboxes, img_w, img_h):
-        bboxes_left_top = np.random.uniform(0, 0.5, size=(num_bboxes, 2))
-        bboxes_right_bottom = np.random.uniform(0.5, 1, size=(num_bboxes, 2))
-        bboxes = np.concatenate((bboxes_left_top, bboxes_right_bottom), 1)
-        bboxes = (bboxes * np.array([img_w, img_h, img_w, img_h])).astype(
-            np.float32)
-        return bboxes
-
-    affine_reszie = AffineResize(img_scale=(1290, 384), down_ratio=4)
-
-    # test the situation: not use Random_Scale_Shift before AffineResize
-    results = dict()
-    img = mmcv.imread('./tests/data/kitti/training/image_2/000000.png',
-                      'color')
-    results['img'] = img
-    results['bbox_fields'] = ['gt_bboxes']
-    results['bbox3d_fields'] = ['gt_bboxes_3d']
-
-    h, w, _ = img.shape
-    gt_bboxes = create_random_bboxes(8, w, h)
-    gt_bboxes_3d = CameraInstance3DBoxes(torch.randn((8, 7)))
-    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)
-    results['gt_labels3d'] = results['gt_labels']
-    results['gt_bboxes'] = gt_bboxes
-    results['gt_bboxes_3d'] = gt_bboxes_3d
-    results['depths'] = np.random.randn(gt_bboxes.shape[0])
-    centers2d_x = (gt_bboxes[:, [0]] + gt_bboxes[:, [2]]) / 2
-    centers2d_y = (gt_bboxes[:, [1]] + gt_bboxes[:, [3]]) / 2
-    centers2d = np.concatenate((centers2d_x, centers2d_y), axis=1)
-    results['centers2d'] = centers2d
-
-    results = affine_reszie(results)
-
-    assert results['gt_labels'].shape[0] == results['centers2d'].shape[0]
-    assert results['gt_labels3d'].shape[0] == results['centers2d'].shape[0]
-    assert results['gt_bboxes'].shape[0] == results['centers2d'].shape[0]
-    assert results['gt_bboxes_3d'].tensor.shape[0] == \
-           results['centers2d'].shape[0]
-    assert results['affine_aug'] is False
-
-    # test the situation: not use Random_Scale_Shift before AffineResize
-    results = dict()
-    img = mmcv.imread('./tests/data/kitti/training/image_2/000000.png',
-                      'color')
-    results['img'] = img
-    results['bbox_fields'] = ['gt_bboxes']
-    results['bbox3d_fields'] = ['gt_bboxes_3d']
-    h, w, _ = img.shape
-    center = np.array([w / 2, h / 2], dtype=np.float32)
-    size = np.array([w, h], dtype=np.float32)
-
-    results['center'] = center
-    results['size'] = size
-    results['affine_aug'] = False
-
-    gt_bboxes = create_random_bboxes(8, w, h)
-    gt_bboxes_3d = CameraInstance3DBoxes(torch.randn((8, 7)))
-    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)
-    results['gt_labels3d'] = results['gt_labels']
-    results['gt_bboxes'] = gt_bboxes
-    results['gt_bboxes_3d'] = gt_bboxes_3d
-    results['depths'] = np.random.randn(gt_bboxes.shape[0])
-    centers2d_x = (gt_bboxes[:, [0]] + gt_bboxes[:, [2]]) / 2
-    centers2d_y = (gt_bboxes[:, [1]] + gt_bboxes[:, [3]]) / 2
-    centers2d = np.concatenate((centers2d_x, centers2d_y), axis=1)
-    results['centers2d'] = centers2d
-
-    results = affine_reszie(results)
-
-    assert results['gt_labels'].shape[0] == results['centers2d'].shape[0]
-    assert results['gt_labels3d'].shape[0] == results['centers2d'].shape[0]
-    assert results['gt_bboxes'].shape[0] == results['centers2d'].shape[0]
-    assert results['gt_bboxes_3d'].tensor.shape[0] == results[
-        'centers2d'].shape[0]
-    assert 'center' in results
-    assert 'size' in results
-    assert 'affine_aug' in results
-
-
-def test_random_shift_scale():
-    random_shift_scale = RandomShiftScale(shift_scale=(0.2, 0.4), aug_prob=0.3)
-    results = dict()
-    img = mmcv.imread('./tests/data/kitti/training/image_2/000000.png',
-                      'color')
-    results['img'] = img
-    results = random_shift_scale(results)
-    assert results['center'].dtype == np.float32
-    assert results['size'].dtype == np.float32
-    assert 'affine_aug' in results
--- a/tests/test_data/test_pipelines/test_indoor_pipeline.py
+++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from os import path as osp
-
-import mmcv
-import numpy as np
-import torch
-
-from mmdet3d.core.bbox import DepthInstance3DBoxes
-from mmdet3d.datasets.pipelines import Compose
-
-
-def test_scannet_pipeline():
-    class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
-                   'window', 'bookshelf', 'picture', 'counter', 'desk',
-                   'curtain', 'refrigerator', 'showercurtrain', 'toilet',
-                   'sink', 'bathtub', 'garbagebin')
-
-    np.random.seed(0)
-    pipelines = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='DEPTH',
-            shift_height=True,
-            load_dim=6,
-            use_dim=[0, 1, 2]),
-        dict(
-            type='LoadAnnotations3D',
-            with_bbox_3d=True,
-            with_label_3d=True,
-            with_mask_3d=True,
-            with_seg_3d=True),
-        dict(type='GlobalAlignment', rotation_axis=2),
-        dict(
-            type='PointSegClassMapping',
-            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
-                           34, 36, 39)),
-        dict(type='PointSample', num_points=5),
-        dict(
-            type='RandomFlip3D',
-            sync_2d=False,
-            flip_ratio_bev_horizontal=1.0,
-            flip_ratio_bev_vertical=1.0),
-        dict(
-            type='GlobalRotScaleTrans',
-            rot_range=[-0.087266, 0.087266],
-            scale_ratio_range=[1.0, 1.0],
-            shift_height=True),
-        dict(type='DefaultFormatBundle3D', class_names=class_names),
-        dict(
-            type='Collect3D',
-            keys=[
-                'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
-                'pts_instance_mask'
-            ]),
-    ]
-    pipeline = Compose(pipelines)
-    info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
-    results = dict()
-    data_path = './tests/data/scannet'
-    results['pts_filename'] = osp.join(data_path, info['pts_path'])
-    if info['annos']['gt_num'] != 0:
-        scannet_gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
-            np.float32)
-        scannet_gt_labels_3d = info['annos']['class'].astype(np.int64)
-    else:
-        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
-        scannet_gt_labels_3d = np.zeros((1, ), dtype=np.int64)
-    results['ann_info'] = dict()
-    results['ann_info']['pts_instance_mask_path'] = osp.join(
-        data_path, info['pts_instance_mask_path'])
-    results['ann_info']['pts_semantic_mask_path'] = osp.join(
-        data_path, info['pts_semantic_mask_path'])
-    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(
-        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
-    results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d
-    results['ann_info']['axis_align_matrix'] = \
-        info['annos']['axis_align_matrix']
-
-    results['img_fields'] = []
-    results['bbox3d_fields'] = []
-    results['pts_mask_fields'] = []
-    results['pts_seg_fields'] = []
-
-    results = pipeline(results)
-
-    points = results['points']._data
-    gt_bboxes_3d = results['gt_bboxes_3d']._data
-    gt_labels_3d = results['gt_labels_3d']._data
-    pts_semantic_mask = results['pts_semantic_mask']._data
-    pts_instance_mask = results['pts_instance_mask']._data
-    expected_points = torch.tensor(
-        [[1.8339e+00, 2.1093e+00, 2.2900e+00, 2.3895e+00],
-         [3.6079e+00, 1.4592e-01, 2.0687e+00, 2.1682e+00],
-         [4.1886e+00, 5.0614e+00, -1.0841e-01, -8.8736e-03],
-         [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03],
-         [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]])
-    expected_gt_bboxes_3d = torch.tensor(
-        [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
-         [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
-         [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
-         [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
-         [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
-    expected_gt_labels_3d = np.array([
-        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
-        0, 0, 0, 5, 5, 5
-    ])
-    expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18])
-    expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
-    assert torch.allclose(points, expected_points, 1e-2)
-    assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,
-                          1e-2)
-    assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d)
-    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)
-    assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)
-
-
-def test_scannet_seg_pipeline():
-    class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
-                   'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
-                   'curtain', 'refrigerator', 'showercurtrain', 'toilet',
-                   'sink', 'bathtub', 'otherfurniture')
-
-    np.random.seed(0)
-    pipelines = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='DEPTH',
-            shift_height=False,
-            use_color=True,
-            load_dim=6,
-            use_dim=[0, 1, 2, 3, 4, 5]),
-        dict(
-            type='LoadAnnotations3D',
-            with_bbox_3d=False,
-            with_label_3d=False,
-            with_mask_3d=False,
-            with_seg_3d=True),
-        dict(
-            type='PointSegClassMapping',
-            valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24,
-                           28, 33, 34, 36, 39),
-            max_cat_id=40),
-        dict(
-            type='IndoorPatchPointSample',
-            num_points=5,
-            block_size=1.5,
-            ignore_index=len(class_names),
-            use_normalized_coord=True,
-            enlarge_size=0.2,
-            min_unique_num=None),
-        dict(type='NormalizePointsColor', color_mean=None),
-        dict(type='DefaultFormatBundle3D', class_names=class_names),
-        dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
-    ]
-    pipeline = Compose(pipelines)
-    info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
-    results = dict()
-    data_path = './tests/data/scannet'
-    results['pts_filename'] = osp.join(data_path, info['pts_path'])
-    results['ann_info'] = dict()
-    results['ann_info']['pts_semantic_mask_path'] = osp.join(
-        data_path, info['pts_semantic_mask_path'])
-
-    results['pts_seg_fields'] = []
-
-    results = pipeline(results)
-
-    points = results['points']._data
-    pts_semantic_mask = results['pts_semantic_mask']._data
-
-    # build sampled points
-    scannet_points = np.fromfile(
-        osp.join(data_path, info['pts_path']), dtype=np.float32).reshape(
-            (-1, 6))
-    scannet_choices = np.array([87, 34, 58, 9, 18])
-    scannet_center = np.array([-2.1772466, -3.4789145, 1.242711])
-    scannet_center[2] = 0.0
-    scannet_coord_max = np.amax(scannet_points[:, :3], axis=0)
-    expected_points = np.concatenate([
-        scannet_points[scannet_choices, :3] - scannet_center,
-        scannet_points[scannet_choices, 3:] / 255.,
-        scannet_points[scannet_choices, :3] / scannet_coord_max
-    ],
-                                     axis=1)
-    expected_pts_semantic_mask = np.array([13, 13, 12, 2, 0])
-    assert np.allclose(points.numpy(), expected_points, atol=1e-6)
-    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)
-
-
-def test_s3dis_seg_pipeline():
-    class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window',
-                   'door', 'table', 'chair', 'sofa', 'bookcase', 'board',
-                   'clutter')
-
-    np.random.seed(0)
-    pipelines = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='DEPTH',
-            shift_height=False,
-            use_color=True,
-            load_dim=6,
-            use_dim=[0, 1, 2, 3, 4, 5]),
-        dict(
-            type='LoadAnnotations3D',
-            with_bbox_3d=False,
-            with_label_3d=False,
-            with_mask_3d=False,
-            with_seg_3d=True),
-        dict(
-            type='PointSegClassMapping',
-            valid_cat_ids=tuple(range(len(class_names))),
-            max_cat_id=13),
-        dict(
-            type='IndoorPatchPointSample',
-            num_points=5,
-            block_size=1.0,
-            ignore_index=len(class_names),
-            use_normalized_coord=True,
-            enlarge_size=0.2,
-            min_unique_num=None),
-        dict(type='NormalizePointsColor', color_mean=None),
-        dict(type='DefaultFormatBundle3D', class_names=class_names),
-        dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
-    ]
-    pipeline = Compose(pipelines)
-    info = mmcv.load('./tests/data/s3dis/s3dis_infos.pkl')[0]
-    results = dict()
-    data_path = './tests/data/s3dis'
-    results['pts_filename'] = osp.join(data_path, info['pts_path'])
-    results['ann_info'] = dict()
-    results['ann_info']['pts_semantic_mask_path'] = osp.join(
-        data_path, info['pts_semantic_mask_path'])
-
-    results['pts_seg_fields'] = []
-
-    results = pipeline(results)
-
-    points = results['points']._data
-    pts_semantic_mask = results['pts_semantic_mask']._data
-
-    # build sampled points
-    s3dis_points = np.fromfile(
-        osp.join(data_path, info['pts_path']), dtype=np.float32).reshape(
-            (-1, 6))
-    s3dis_choices = np.array([87, 37, 60, 18, 31])
-    s3dis_center = np.array([2.691, 2.231, 3.172])
-    s3dis_center[2] = 0.0
-    s3dis_coord_max = np.amax(s3dis_points[:, :3], axis=0)
-    expected_points = np.concatenate([
-        s3dis_points[s3dis_choices, :3] - s3dis_center,
-        s3dis_points[s3dis_choices, 3:] / 255.,
-        s3dis_points[s3dis_choices, :3] / s3dis_coord_max
-    ],
-                                     axis=1)
-    expected_pts_semantic_mask = np.array([0, 1, 0, 8, 0])
-    assert np.allclose(points.numpy(), expected_points, atol=1e-6)
-    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)
-
-
-def test_sunrgbd_pipeline():
-    class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
-                   'dresser', 'night_stand', 'bookshelf', 'bathtub')
-    np.random.seed(0)
-    pipelines = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='DEPTH',
-            shift_height=True,
-            load_dim=6,
-            use_dim=[0, 1, 2]),
-        dict(type='LoadAnnotations3D'),
-        dict(
-            type='RandomFlip3D',
-            sync_2d=False,
-            flip_ratio_bev_horizontal=1.0,
-        ),
-        dict(
-            type='GlobalRotScaleTrans',
-            rot_range=[-0.523599, 0.523599],
-            scale_ratio_range=[0.85, 1.15],
-            shift_height=True),
-        dict(type='PointSample', num_points=5),
-        dict(type='DefaultFormatBundle3D', class_names=class_names),
-        dict(
-            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
-    ]
-    pipeline = Compose(pipelines)
-    results = dict()
-    info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]
-    data_path = './tests/data/sunrgbd'
-    results['pts_filename'] = osp.join(data_path, info['pts_path'])
-
-    if info['annos']['gt_num'] != 0:
-        gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(
-            np.float32)
-        gt_labels_3d = info['annos']['class'].astype(np.int64)
-    else:
-        gt_bboxes_3d = np.zeros((1, 7), dtype=np.float32)
-        gt_labels_3d = np.zeros((1, ), dtype=np.int64)
-
-    # prepare input of pipeline
-    results['ann_info'] = dict()
-    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(gt_bboxes_3d)
-    results['ann_info']['gt_labels_3d'] = gt_labels_3d
-    results['img_fields'] = []
-    results['bbox3d_fields'] = []
-    results['pts_mask_fields'] = []
-    results['pts_seg_fields'] = []
-
-    results = pipeline(results)
-    points = results['points']._data
-    gt_bboxes_3d = results['gt_bboxes_3d']._data
-    gt_labels_3d = results['gt_labels_3d']._data
-    expected_points = torch.tensor([[0.8678, 1.3470, 0.1105, 0.0905],
-                                    [0.8707, 1.3635, 0.0437, 0.0238],
-                                    [0.8636, 1.3511, 0.0504, 0.0304],
-                                    [0.8690, 1.3461, 0.1265, 0.1065],
-                                    [0.8668, 1.3434, 0.1216, 0.1017]])
-    # Depth coordinate system update: only yaw changes since rotation in depth
-    # is counter-clockwise and yaw angle is clockwise originally
-    # But heading angles in sunrgbd data also reverses the sign
-    # and after horizontal flip the sign reverse again
-    rotation_angle = info['annos']['rotation_y']
-    expected_gt_bboxes_3d = torch.tensor(
-        [[
-            -1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245,
-            1.3989 + 0.047001579467984445 * 2 - 2 * rotation_angle[0]
-        ],
-         [
-             -2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510,
-             1.4446 + 0.047001579467984445 * 2 - 2 * rotation_angle[1]
-         ],
-         [
-             0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563,
-             2.9924 + 0.047001579467984445 * 2 - 2 * rotation_angle[2]
-         ]]).float()
-    expected_gt_labels_3d = np.array([0, 7, 6])
-    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
-    assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d)
-    assert torch.allclose(points, expected_points, 1e-2)
--- a/tests/test_data/test_pipelines/test_indoor_sample.py
+++ b/tests/test_data/test_pipelines/test_indoor_sample.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-
-from mmdet3d.core.points import DepthPoints
-from mmdet3d.datasets.pipelines import (IndoorPatchPointSample, PointSample,
-                                        PointSegClassMapping)
-
-
-def test_indoor_sample():
-    np.random.seed(0)
-    scannet_sample_points = PointSample(5)
-    scannet_results = dict()
-    scannet_points = np.array([[1.0719866, -0.7870435, 0.8408122, 0.9196809],
-                               [1.103661, 0.81065744, 2.6616862, 2.7405548],
-                               [1.0276475, 1.5061463, 2.6174362, 2.6963048],
-                               [-0.9709588, 0.6750515, 0.93901765, 1.0178864],
-                               [1.0578915, 1.1693821, 0.87503505, 0.95390373],
-                               [0.05560996, -1.5688863, 1.2440368, 1.3229055],
-                               [-0.15731563, -1.7735453, 2.7535574, 2.832426],
-                               [1.1188195, -0.99211365, 2.5551798, 2.6340485],
-                               [-0.9186557, -1.7041215, 2.0562649, 2.1351335],
-                               [-1.0128691, -1.3394243, 0.040936, 0.1198047]])
-    scannet_results['points'] = DepthPoints(
-        scannet_points, points_dim=4, attribute_dims=dict(height=3))
-    scannet_pts_instance_mask = np.array(
-        [15, 12, 11, 38, 0, 18, 17, 12, 17, 0])
-    scannet_results['pts_instance_mask'] = scannet_pts_instance_mask
-    scannet_pts_semantic_mask = np.array([38, 1, 1, 40, 0, 40, 1, 1, 1, 0])
-    scannet_results['pts_semantic_mask'] = scannet_pts_semantic_mask
-    scannet_results = scannet_sample_points(scannet_results)
-    scannet_points_result = scannet_results['points'].tensor.numpy()
-    scannet_instance_labels_result = scannet_results['pts_instance_mask']
-    scannet_semantic_labels_result = scannet_results['pts_semantic_mask']
-    scannet_choices = np.array([2, 8, 4, 9, 1])
-    assert np.allclose(scannet_points[scannet_choices], scannet_points_result)
-    assert np.all(scannet_pts_instance_mask[scannet_choices] ==
-                  scannet_instance_labels_result)
-    assert np.all(scannet_pts_semantic_mask[scannet_choices] ==
-                  scannet_semantic_labels_result)
-
-    np.random.seed(0)
-    sunrgbd_sample_points = PointSample(5)
-    sunrgbd_results = dict()
-    sunrgbd_point_cloud = np.array(
-        [[-1.8135729e-01, 1.4695230e+00, -1.2780589e+00, 7.8938007e-03],
-         [1.2581362e-03, 2.0561588e+00, -1.0341064e+00, 2.5184631e-01],
-         [6.8236995e-01, 3.3611867e+00, -9.2599887e-01, 3.5995382e-01],
-         [-2.9432583e-01, 1.8714852e+00, -9.0929651e-01, 3.7665617e-01],
-         [-0.5024875, 1.8032674, -1.1403012, 0.14565146],
-         [-0.520559, 1.6324949, -0.9896099, 0.2963428],
-         [0.95929825, 2.9402404, -0.8746674, 0.41128528],
-         [-0.74624217, 1.5244724, -0.8678476, 0.41810507],
-         [0.56485355, 1.5747732, -0.804522, 0.4814307],
-         [-0.0913099, 1.3673826, -1.2800645, 0.00588822]])
-    sunrgbd_results['points'] = DepthPoints(
-        sunrgbd_point_cloud, points_dim=4, attribute_dims=dict(height=3))
-    sunrgbd_results = sunrgbd_sample_points(sunrgbd_results)
-    sunrgbd_choices = np.array([2, 8, 4, 9, 1])
-    sunrgbd_points_result = sunrgbd_results['points'].tensor.numpy()
-    repr_str = repr(sunrgbd_sample_points)
-    expected_repr_str = 'PointSample(num_points=5, ' \
-                        'sample_range=None, ' \
-                        'replace=False)'
-    assert repr_str == expected_repr_str
-    assert np.allclose(sunrgbd_point_cloud[sunrgbd_choices],
-                       sunrgbd_points_result)
-
-
-def test_indoor_seg_sample():
-    # test the train time behavior of IndoorPatchPointSample
-    np.random.seed(0)
-    scannet_patch_sample_points = IndoorPatchPointSample(
-        5, 1.5, ignore_index=20, use_normalized_coord=True)
-    scannet_seg_class_mapping = \
-        PointSegClassMapping((1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16,
-                              24, 28, 33, 34, 36, 39), 40)
-    scannet_results = dict()
-    scannet_points = np.fromfile(
-        './tests/data/scannet/points/scene0000_00.bin',
-        dtype=np.float32).reshape((-1, 6))
-    scannet_results['points'] = DepthPoints(
-        scannet_points, points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
-
-    scannet_pts_semantic_mask = np.fromfile(
-        './tests/data/scannet/semantic_mask/scene0000_00.bin', dtype=np.int64)
-    scannet_results['pts_semantic_mask'] = scannet_pts_semantic_mask
-
-    scannet_results = scannet_seg_class_mapping(scannet_results)
-    scannet_results = scannet_patch_sample_points(scannet_results)
-    scannet_points_result = scannet_results['points']
-    scannet_semantic_labels_result = scannet_results['pts_semantic_mask']
-
-    # manually constructed sampled points
-    scannet_choices = np.array([87, 34, 58, 9, 18])
-    scannet_center = np.array([-2.1772466, -3.4789145, 1.242711])
-    scannet_center[2] = 0.0
-    scannet_coord_max = np.amax(scannet_points[:, :3], axis=0)
-    scannet_input_points = np.concatenate([
-        scannet_points[scannet_choices, :3] - scannet_center,
-        scannet_points[scannet_choices, 3:],
-        scannet_points[scannet_choices, :3] / scannet_coord_max
-    ], 1)
-
-    assert scannet_points_result.points_dim == 9
-    assert scannet_points_result.attribute_dims == dict(
-        color=[3, 4, 5], normalized_coord=[6, 7, 8])
-    scannet_points_result = scannet_points_result.tensor.numpy()
-    assert np.allclose(scannet_input_points, scannet_points_result, atol=1e-6)
-    assert np.all(
-        np.array([13, 13, 12, 2, 0]) == scannet_semantic_labels_result)
-
-    repr_str = repr(scannet_patch_sample_points)
-    expected_repr_str = 'IndoorPatchPointSample(num_points=5, ' \
-                        'block_size=1.5, ' \
-                        'ignore_index=20, ' \
-                        'use_normalized_coord=True, ' \
-                        'num_try=10, ' \
-                        'enlarge_size=0.2, ' \
-                        'min_unique_num=None, ' \
-                        'eps=0.01)'
-    assert repr_str == expected_repr_str
-
-    # when enlarge_size and min_unique_num are set
-    np.random.seed(0)
-    scannet_patch_sample_points = IndoorPatchPointSample(
-        5,
-        1.0,
-        ignore_index=20,
-        use_normalized_coord=False,
-        num_try=1000,
-        enlarge_size=None,
-        min_unique_num=5)
-    # this patch is within [0, 1] and has 5 unique points
-    # it should be selected
-    scannet_points = np.random.rand(5, 6)
-    scannet_points[0, :3] = np.array([0.5, 0.5, 0.5])
-    # generate points smaller than `min_unique_num` in local patches
-    # they won't be sampled
-    for i in range(2, 11, 2):
-        scannet_points = np.concatenate(
-            [scannet_points, np.random.rand(4, 6) + i], axis=0)
-    scannet_results = dict(
-        points=DepthPoints(
-            scannet_points, points_dim=6,
-            attribute_dims=dict(color=[3, 4, 5])),
-        pts_semantic_mask=np.random.randint(0, 20,
-                                            (scannet_points.shape[0], )))
-    scannet_results = scannet_patch_sample_points(scannet_results)
-    scannet_points_result = scannet_results['points']
-
-    # manually constructed sampled points
-    scannet_choices = np.array([2, 4, 3, 1, 0])
-    scannet_center = np.array([0.56804454, 0.92559665, 0.07103606])
-    scannet_center[2] = 0.0
-    scannet_input_points = np.concatenate([
-        scannet_points[scannet_choices, :3] - scannet_center,
-        scannet_points[scannet_choices, 3:],
-    ], 1)
-
-    assert scannet_points_result.points_dim == 6
-    assert scannet_points_result.attribute_dims == dict(color=[3, 4, 5])
-    scannet_points_result = scannet_points_result.tensor.numpy()
-    assert np.allclose(scannet_input_points, scannet_points_result, atol=1e-6)
-
-    # test on S3DIS dataset
-    np.random.seed(0)
-    s3dis_patch_sample_points = IndoorPatchPointSample(
-        5, 1.0, ignore_index=None, use_normalized_coord=True)
-    s3dis_results = dict()
-    s3dis_points = np.fromfile(
-        './tests/data/s3dis/points/Area_1_office_2.bin',
-        dtype=np.float32).reshape((-1, 6))
-    s3dis_results['points'] = DepthPoints(
-        s3dis_points, points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
-
-    s3dis_pts_semantic_mask = np.fromfile(
-        './tests/data/s3dis/semantic_mask/Area_1_office_2.bin', dtype=np.int64)
-    s3dis_results['pts_semantic_mask'] = s3dis_pts_semantic_mask
-
-    s3dis_results = s3dis_patch_sample_points(s3dis_results)
-    s3dis_points_result = s3dis_results['points']
-    s3dis_semantic_labels_result = s3dis_results['pts_semantic_mask']
-
-    # manually constructed sampled points
-    s3dis_choices = np.array([87, 37, 60, 18, 31])
-    s3dis_center = np.array([2.691, 2.231, 3.172])
-    s3dis_center[2] = 0.0
-    s3dis_coord_max = np.amax(s3dis_points[:, :3], axis=0)
-    s3dis_input_points = np.concatenate([
-        s3dis_points[s3dis_choices, :3] - s3dis_center,
-        s3dis_points[s3dis_choices,
-                     3:], s3dis_points[s3dis_choices, :3] / s3dis_coord_max
-    ], 1)
-
-    assert s3dis_points_result.points_dim == 9
-    assert s3dis_points_result.attribute_dims == dict(
-        color=[3, 4, 5], normalized_coord=[6, 7, 8])
-    s3dis_points_result = s3dis_points_result.tensor.numpy()
-    assert np.allclose(s3dis_input_points, s3dis_points_result, atol=1e-6)
-    assert np.all(np.array([0, 1, 0, 8, 0]) == s3dis_semantic_labels_result)
--- a/tests/test_data/test_pipelines/test_loadings/test_load_images_from_multi_views.py
+++ b/tests/test_data/test_pipelines/test_loadings/test_load_images_from_multi_views.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import torch
-from mmcv.parallel import DataContainer
-
-from mmdet3d.datasets.pipelines import (DefaultFormatBundle,
-                                        LoadMultiViewImageFromFiles)
-
-
-def test_load_multi_view_image_from_files():
-    multi_view_img_loader = LoadMultiViewImageFromFiles(to_float32=True)
-
-    num_views = 6
-    filename = 'tests/data/waymo/kitti_format/training/image_0/0000000.png'
-    filenames = [filename for _ in range(num_views)]
-
-    input_dict = dict(img_filename=filenames)
-    results = multi_view_img_loader(input_dict)
-    img = results['img']
-    img0 = img[0]
-    img_norm_cfg = results['img_norm_cfg']
-
-    assert isinstance(img, list)
-    assert len(img) == num_views
-    assert img0.dtype == np.float32
-    assert results['filename'] == filenames
-    assert results['img_shape'] == results['ori_shape'] == \
-        results['pad_shape'] == (1280, 1920, 3, num_views)
-    assert results['scale_factor'] == 1.0
-    assert np.all(img_norm_cfg['mean'] == np.zeros(3, dtype=np.float32))
-    assert np.all(img_norm_cfg['std'] == np.ones(3, dtype=np.float32))
-    assert not img_norm_cfg['to_rgb']
-
-    repr_str = repr(multi_view_img_loader)
-    expected_str = 'LoadMultiViewImageFromFiles(to_float32=True, ' \
-                   "color_type='unchanged')"
-    assert repr_str == expected_str
-
-    # test LoadMultiViewImageFromFiles's compatibility with DefaultFormatBundle
-    # refer to https://github.com/open-mmlab/mmdetection3d/issues/227
-    default_format_bundle = DefaultFormatBundle()
-    results = default_format_bundle(results)
-    img = results['img']
-
-    assert isinstance(img, DataContainer)
-    assert img._data.shape == torch.Size((num_views, 3, 1280, 1920))
--- a/tests/test_data/test_pipelines/test_loadings/test_load_points_from_multi_sweeps.py
+++ b/tests/test_data/test_pipelines/test_loadings/test_load_points_from_multi_sweeps.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-
-from mmdet3d.core.points import LiDARPoints
-from mmdet3d.datasets.pipelines.loading import LoadPointsFromMultiSweeps
-
-
-def test_load_points_from_multi_sweeps():
-    np.random.seed(0)
-
-    file_client_args = dict(backend='disk')
-    load_points_from_multi_sweeps_1 = LoadPointsFromMultiSweeps(
-        sweeps_num=9,
-        use_dim=[0, 1, 2, 3, 4],
-        file_client_args=file_client_args)
-
-    load_points_from_multi_sweeps_2 = LoadPointsFromMultiSweeps(
-        sweeps_num=9,
-        use_dim=[0, 1, 2, 3, 4],
-        file_client_args=file_client_args,
-        pad_empty_sweeps=True,
-        remove_close=True)
-
-    load_points_from_multi_sweeps_3 = LoadPointsFromMultiSweeps(
-        sweeps_num=9,
-        use_dim=[0, 1, 2, 3, 4],
-        file_client_args=file_client_args,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        test_mode=True)
-
-    points = np.random.random([100, 5]) * 2
-    points = LiDARPoints(points, points_dim=5)
-    input_results = dict(points=points, sweeps=[], timestamp=None)
-    results = load_points_from_multi_sweeps_1(input_results)
-    assert results['points'].tensor.numpy().shape == (100, 5)
-
-    input_results = dict(points=points, sweeps=[], timestamp=None)
-    results = load_points_from_multi_sweeps_2(input_results)
-    assert results['points'].tensor.numpy().shape == (775, 5)
-
-    sensor2lidar_rotation = np.array(
-        [[9.99999967e-01, 1.13183067e-05, 2.56845368e-04],
-         [-1.12839618e-05, 9.99999991e-01, -1.33719456e-04],
-         [-2.56846879e-04, 1.33716553e-04, 9.99999958e-01]])
-    sensor2lidar_translation = np.array([-0.0009198, -0.03964854, -0.00190136])
-    sweep = dict(
-        data_path='tests/data/nuscenes/sweeps/LIDAR_TOP/'
-        'n008-2018-09-18-12-07-26-0400__LIDAR_TOP__'
-        '1537287083900561.pcd.bin',
-        sensor2lidar_rotation=sensor2lidar_rotation,
-        sensor2lidar_translation=sensor2lidar_translation,
-        timestamp=0)
-
-    input_results = dict(points=points, sweeps=[sweep], timestamp=1.0)
-    results = load_points_from_multi_sweeps_1(input_results)
-    assert results['points'].tensor.numpy().shape == (500, 5)
-
-    input_results = dict(points=points, sweeps=[sweep], timestamp=1.0)
-    results = load_points_from_multi_sweeps_2(input_results)
-    assert results['points'].tensor.numpy().shape == (451, 5)
-
-    input_results = dict(points=points, sweeps=[sweep] * 10, timestamp=1.0)
-    results = load_points_from_multi_sweeps_2(input_results)
-    assert results['points'].tensor.numpy().shape == (3259, 5)
-
-    input_results = dict(points=points, sweeps=[sweep] * 10, timestamp=1.0)
-    results = load_points_from_multi_sweeps_3(input_results)
-    assert results['points'].tensor.numpy().shape == (3259, 5)
--- a/tests/test_data/test_pipelines/test_loadings/test_loading.py
+++ b/tests/test_data/test_pipelines/test_loadings/test_loading.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from os import path as osp
-
-import mmcv
-import numpy as np
-import pytest
-
-from mmdet3d.core.bbox import DepthInstance3DBoxes
-from mmdet3d.core.points import DepthPoints, LiDARPoints
-# yapf: disable
-from mmdet3d.datasets.pipelines import (LoadAnnotations3D,
-                                        LoadImageFromFileMono3D,
-                                        LoadPointsFromFile,
-                                        LoadPointsFromMultiSweeps,
-                                        NormalizePointsColor,
-                                        PointSegClassMapping)
-
-# yapf: enable
-
-
-def test_load_points_from_indoor_file():
-    # test on SUN RGB-D dataset with shifted height
-    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')
-    sunrgbd_load_points_from_file = LoadPointsFromFile(
-        coord_type='DEPTH', load_dim=6, shift_height=True)
-    sunrgbd_results = dict()
-    data_path = './tests/data/sunrgbd'
-    sunrgbd_info = sunrgbd_info[0]
-    sunrgbd_results['pts_filename'] = osp.join(data_path,
-                                               sunrgbd_info['pts_path'])
-    sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)
-    sunrgbd_point_cloud = sunrgbd_results['points'].tensor.numpy()
-    assert sunrgbd_point_cloud.shape == (100, 4)
-
-    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')
-    scannet_load_data = LoadPointsFromFile(
-        coord_type='DEPTH', shift_height=True)
-    scannet_results = dict()
-    data_path = './tests/data/scannet'
-    scannet_info = scannet_info[0]
-
-    # test on ScanNet dataset with shifted height
-    scannet_results['pts_filename'] = osp.join(data_path,
-                                               scannet_info['pts_path'])
-    scannet_results = scannet_load_data(scannet_results)
-    scannet_point_cloud = scannet_results['points'].tensor.numpy()
-    repr_str = repr(scannet_load_data)
-    expected_repr_str = 'LoadPointsFromFile(shift_height=True, ' \
-                        'use_color=False, ' \
-                        'file_client_args={\'backend\': \'disk\'}, ' \
-                        'load_dim=6, use_dim=[0, 1, 2])'
-    assert repr_str == expected_repr_str
-    assert scannet_point_cloud.shape == (100, 4)
-
-    # test load point cloud with both shifted height and color
-    scannet_load_data = LoadPointsFromFile(
-        coord_type='DEPTH',
-        load_dim=6,
-        use_dim=[0, 1, 2, 3, 4, 5],
-        shift_height=True,
-        use_color=True)
-
-    scannet_results = dict()
-
-    scannet_results['pts_filename'] = osp.join(data_path,
-                                               scannet_info['pts_path'])
-    scannet_results = scannet_load_data(scannet_results)
-    scannet_point_cloud = scannet_results['points']
-    assert scannet_point_cloud.points_dim == 7
-    assert scannet_point_cloud.attribute_dims == dict(
-        height=3, color=[4, 5, 6])
-
-    scannet_point_cloud = scannet_point_cloud.tensor.numpy()
-    assert scannet_point_cloud.shape == (100, 7)
-
-    # test load point cloud on S3DIS with color
-    data_path = './tests/data/s3dis'
-    s3dis_info = mmcv.load('./tests/data/s3dis/s3dis_infos.pkl')
-    s3dis_info = s3dis_info[0]
-    s3dis_load_data = LoadPointsFromFile(
-        coord_type='DEPTH',
-        load_dim=6,
-        use_dim=[0, 1, 2, 3, 4, 5],
-        shift_height=False,
-        use_color=True)
-
-    s3dis_results = dict()
-
-    s3dis_results['pts_filename'] = osp.join(data_path, s3dis_info['pts_path'])
-    s3dis_results = s3dis_load_data(s3dis_results)
-    s3dis_point_cloud = s3dis_results['points']
-    assert s3dis_point_cloud.points_dim == 6
-    assert s3dis_point_cloud.attribute_dims == dict(color=[3, 4, 5])
-
-    s3dis_point_cloud = s3dis_point_cloud.tensor.numpy()
-    assert s3dis_point_cloud.shape == (100, 6)
-
-
-def test_load_points_from_outdoor_file():
-    data_path = 'tests/data/kitti/a.bin'
-    load_points_from_file = LoadPointsFromFile(
-        coord_type='LIDAR', load_dim=4, use_dim=4)
-    results = dict()
-    results['pts_filename'] = data_path
-    results = load_points_from_file(results)
-    points = results['points'].tensor.numpy()
-    assert points.shape == (50, 4)
-    assert np.allclose(points.sum(), 2637.479)
-
-    load_points_from_file = LoadPointsFromFile(
-        coord_type='LIDAR', load_dim=4, use_dim=[0, 1, 2, 3])
-    results = dict()
-    results['pts_filename'] = data_path
-    results = load_points_from_file(results)
-    new_points = results['points'].tensor.numpy()
-    assert new_points.shape == (50, 4)
-    assert np.allclose(points.sum(), 2637.479)
-    np.equal(points, new_points)
-
-    with pytest.raises(AssertionError):
-        LoadPointsFromFile(coord_type='LIDAR', load_dim=4, use_dim=5)
-
-
-def test_load_annotations3D():
-    # Test scannet LoadAnnotations3D
-    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
-    scannet_load_annotations3D = LoadAnnotations3D(
-        with_bbox_3d=True,
-        with_label_3d=True,
-        with_mask_3d=True,
-        with_seg_3d=True)
-    scannet_results = dict()
-    data_path = './tests/data/scannet'
-
-    if scannet_info['annos']['gt_num'] != 0:
-        scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']
-        scannet_gt_labels_3d = scannet_info['annos']['class']
-    else:
-        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
-        scannet_gt_labels_3d = np.zeros((1, ))
-
-    # prepare input of loading pipeline
-    scannet_results['ann_info'] = dict()
-    scannet_results['ann_info']['pts_instance_mask_path'] = osp.join(
-        data_path, scannet_info['pts_instance_mask_path'])
-    scannet_results['ann_info']['pts_semantic_mask_path'] = osp.join(
-        data_path, scannet_info['pts_semantic_mask_path'])
-    scannet_results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(
-        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
-    scannet_results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d
-
-    scannet_results['bbox3d_fields'] = []
-    scannet_results['pts_mask_fields'] = []
-    scannet_results['pts_seg_fields'] = []
-
-    scannet_results = scannet_load_annotations3D(scannet_results)
-    scannet_gt_boxes = scannet_results['gt_bboxes_3d']
-    scannet_gt_labels = scannet_results['gt_labels_3d']
-
-    scannet_pts_instance_mask = scannet_results['pts_instance_mask']
-    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
-    repr_str = repr(scannet_load_annotations3D)
-    expected_repr_str = 'LoadAnnotations3D(\n    with_bbox_3d=True,     ' \
-                        'with_label_3d=True,     with_attr_label=False,     ' \
-                        'with_mask_3d=True,     with_seg_3d=True,     ' \
-                        'with_bbox=False,     with_label=False,     ' \
-                        'with_mask=False,     with_seg=False,     ' \
-                        'with_bbox_depth=False,     poly2mask=True)'
-    assert repr_str == expected_repr_str
-    assert scannet_gt_boxes.tensor.shape == (27, 7)
-    assert scannet_gt_labels.shape == (27, )
-    assert scannet_pts_instance_mask.shape == (100, )
-    assert scannet_pts_semantic_mask.shape == (100, )
-
-    # Test s3dis LoadAnnotations3D
-    s3dis_info = mmcv.load('./tests/data/s3dis/s3dis_infos.pkl')[0]
-    s3dis_load_annotations3D = LoadAnnotations3D(
-        with_bbox_3d=False,
-        with_label_3d=False,
-        with_mask_3d=True,
-        with_seg_3d=True)
-    s3dis_results = dict()
-    data_path = './tests/data/s3dis'
-
-    # prepare input of loading pipeline
-    s3dis_results['ann_info'] = dict()
-    s3dis_results['ann_info']['pts_instance_mask_path'] = osp.join(
-        data_path, s3dis_info['pts_instance_mask_path'])
-    s3dis_results['ann_info']['pts_semantic_mask_path'] = osp.join(
-        data_path, s3dis_info['pts_semantic_mask_path'])
-
-    s3dis_results['pts_mask_fields'] = []
-    s3dis_results['pts_seg_fields'] = []
-
-    s3dis_results = s3dis_load_annotations3D(s3dis_results)
-
-    s3dis_pts_instance_mask = s3dis_results['pts_instance_mask']
-    s3dis_pts_semantic_mask = s3dis_results['pts_semantic_mask']
-    assert s3dis_pts_instance_mask.shape == (100, )
-    assert s3dis_pts_semantic_mask.shape == (100, )
-
-
-def test_load_segmentation_mask():
-    # Test loading semantic segmentation mask on ScanNet dataset
-    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
-    scannet_load_annotations3D = LoadAnnotations3D(
-        with_bbox_3d=False,
-        with_label_3d=False,
-        with_mask_3d=False,
-        with_seg_3d=True)
-    scannet_results = dict()
-    data_path = './tests/data/scannet'
-
-    # prepare input of loading pipeline
-    scannet_results['ann_info'] = dict()
-    scannet_results['ann_info']['pts_semantic_mask_path'] = osp.join(
-        data_path, scannet_info['pts_semantic_mask_path'])
-    scannet_results['pts_seg_fields'] = []
-
-    scannet_results = scannet_load_annotations3D(scannet_results)
-    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
-    assert scannet_pts_semantic_mask.shape == (100, )
-
-    # Convert class_id to label and assign ignore_index
-    scannet_seg_class_mapping = \
-        PointSegClassMapping((1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16,
-                              24, 28, 33, 34, 36, 39), 40)
-    scannet_results = scannet_seg_class_mapping(scannet_results)
-    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
-
-    assert np.all(scannet_pts_semantic_mask == np.array([
-        13, 20, 1, 2, 6, 2, 13, 1, 13, 2, 0, 20, 5, 20, 2, 0, 1, 13, 0, 0, 0,
-        20, 6, 20, 13, 20, 2, 20, 20, 2, 16, 5, 13, 5, 13, 0, 20, 0, 0, 1, 7,
-        20, 20, 20, 20, 20, 20, 20, 0, 1, 2, 13, 16, 1, 1, 1, 6, 2, 12, 20, 3,
-        20, 20, 14, 1, 20, 2, 1, 7, 2, 0, 5, 20, 5, 20, 20, 3, 6, 5, 20, 0, 13,
-        12, 2, 20, 0, 0, 13, 20, 1, 20, 5, 3, 0, 13, 1, 2, 2, 2, 1
-    ]))
-
-    # Test on S3DIS dataset
-    s3dis_info = mmcv.load('./tests/data/s3dis/s3dis_infos.pkl')[0]
-    s3dis_load_annotations3D = LoadAnnotations3D(
-        with_bbox_3d=False,
-        with_label_3d=False,
-        with_mask_3d=False,
-        with_seg_3d=True)
-    s3dis_results = dict()
-    data_path = './tests/data/s3dis'
-
-    # prepare input of loading pipeline
-    s3dis_results['ann_info'] = dict()
-    s3dis_results['ann_info']['pts_semantic_mask_path'] = osp.join(
-        data_path, s3dis_info['pts_semantic_mask_path'])
-    s3dis_results['pts_seg_fields'] = []
-
-    s3dis_results = s3dis_load_annotations3D(s3dis_results)
-    s3dis_pts_semantic_mask = s3dis_results['pts_semantic_mask']
-    assert s3dis_pts_semantic_mask.shape == (100, )
-
-    # Convert class_id to label and assign ignore_index
-    s3dis_seg_class_mapping = PointSegClassMapping(tuple(range(13)), 13)
-    s3dis_results = s3dis_seg_class_mapping(s3dis_results)
-    s3dis_pts_semantic_mask = s3dis_results['pts_semantic_mask']
-
-    assert np.all(s3dis_pts_semantic_mask == np.array([
-        2, 2, 1, 2, 2, 5, 1, 0, 1, 1, 9, 12, 3, 0, 2, 0, 2, 0, 8, 2, 0, 2, 0,
-        2, 1, 7, 2, 10, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 2, 2, 0, 0, 4, 6, 7, 2,
-        1, 2, 0, 1, 7, 0, 2, 2, 2, 0, 2, 2, 1, 12, 0, 2, 2, 2, 2, 7, 2, 2, 0,
-        2, 6, 2, 12, 6, 2, 12, 2, 1, 6, 1, 2, 6, 8, 2, 10, 1, 10, 0, 6, 9, 4,
-        3, 0, 0, 12, 1, 1, 5, 2, 2
-    ]))
-
-
-def test_load_points_from_multi_sweeps():
-    load_points_from_multi_sweeps = LoadPointsFromMultiSweeps()
-    sweep = dict(
-        data_path='./tests/data/nuscenes/sweeps/LIDAR_TOP/'
-        'n008-2018-09-18-12-07-26-0400__LIDAR_TOP__1537287083900561.pcd.bin',
-        timestamp=1537290014899034,
-        sensor2lidar_translation=[-0.02344713, -3.88266051, -0.17151584],
-        sensor2lidar_rotation=np.array(
-            [[9.99979347e-01, 3.99870769e-04, 6.41441690e-03],
-             [-4.42034222e-04, 9.99978299e-01, 6.57316197e-03],
-             [-6.41164929e-03, -6.57586161e-03, 9.99957824e-01]]))
-    points = LiDARPoints(
-        np.array([[1., 2., 3., 4., 5.], [1., 2., 3., 4., 5.],
-                  [1., 2., 3., 4., 5.]]),
-        points_dim=5)
-    results = dict(points=points, timestamp=1537290014899034, sweeps=[sweep])
-
-    results = load_points_from_multi_sweeps(results)
-    points = results['points'].tensor.numpy()
-    repr_str = repr(load_points_from_multi_sweeps)
-    expected_repr_str = 'LoadPointsFromMultiSweeps(sweeps_num=10)'
-    assert repr_str == expected_repr_str
-    assert points.shape == (403, 4)
-
-
-def test_load_image_from_file_mono_3d():
-    load_image_from_file_mono_3d = LoadImageFromFileMono3D()
-    filename = 'tests/data/nuscenes/samples/CAM_BACK_LEFT/' \
-        'n015-2018-07-18-11-07-57+0800__CAM_BACK_LEFT__1531883530447423.jpg'
-    cam_intrinsic = np.array([[1256.74, 0.0, 792.11], [0.0, 1256.74, 492.78],
-                              [0.0, 0.0, 1.0]])
-    input_dict = dict(
-        img_prefix=None,
-        img_info=dict(filename=filename, cam_intrinsic=cam_intrinsic.copy()))
-    results = load_image_from_file_mono_3d(input_dict)
-    assert results['img'].shape == (900, 1600, 3)
-    assert np.all(results['cam2img'] == cam_intrinsic)
-
-    repr_str = repr(load_image_from_file_mono_3d)
-    expected_repr_str = 'LoadImageFromFileMono3D(to_float32=False, ' \
-        "color_type='color', channel_order='bgr', " \
-        "file_client_args={'backend': 'disk'})"
-    assert repr_str == expected_repr_str
-
-
-def test_point_seg_class_mapping():
-    # max_cat_id should larger tham max id in valid_cat_ids
-    with pytest.raises(AssertionError):
-        point_seg_class_mapping = PointSegClassMapping([1, 2, 5], 4)
-
-    sem_mask = np.array([
-        16, 22, 2, 3, 7, 3, 16, 2, 16, 3, 1, 0, 6, 22, 3, 1, 2, 16, 1, 1, 1,
-        38, 7, 25, 16, 25, 3, 40, 38, 3, 33, 6, 16, 6, 16, 1, 38, 1, 1, 2, 8,
-        0, 18, 15, 0, 0, 40, 40, 1, 2, 3, 16, 33, 2, 2, 2, 7, 3, 14, 22, 4, 22,
-        15, 24, 2, 40, 3, 2, 8, 3, 1, 6, 40, 6, 0, 15, 4, 7, 6, 0, 1, 16, 14,
-        3, 0, 1, 1, 16, 38, 2, 15, 6, 4, 1, 16, 2, 3, 3, 3, 2
-    ])
-    valid_cat_ids = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
-                     34, 36, 39)
-    point_seg_class_mapping = PointSegClassMapping(valid_cat_ids, 40)
-    input_dict = dict(pts_semantic_mask=sem_mask)
-    results = point_seg_class_mapping(input_dict)
-    mapped_sem_mask = results['pts_semantic_mask']
-    expected_sem_mask = np.array([
-        13, 20, 1, 2, 6, 2, 13, 1, 13, 2, 0, 20, 5, 20, 2, 0, 1, 13, 0, 0, 0,
-        20, 6, 20, 13, 20, 2, 20, 20, 2, 16, 5, 13, 5, 13, 0, 20, 0, 0, 1, 7,
-        20, 20, 20, 20, 20, 20, 20, 0, 1, 2, 13, 16, 1, 1, 1, 6, 2, 12, 20, 3,
-        20, 20, 14, 1, 20, 2, 1, 7, 2, 0, 5, 20, 5, 20, 20, 3, 6, 5, 20, 0, 13,
-        12, 2, 20, 0, 0, 13, 20, 1, 20, 5, 3, 0, 13, 1, 2, 2, 2, 1
-    ])
-    repr_str = repr(point_seg_class_mapping)
-    expected_repr_str = f'PointSegClassMapping(valid_cat_ids={valid_cat_ids}'\
-        ', max_cat_id=40)'
-
-    assert np.all(mapped_sem_mask == expected_sem_mask)
-    assert repr_str == expected_repr_str
-
-
-def test_normalize_points_color():
-    coord = np.array([[68.137, 3.358, 2.516], [67.697, 3.55, 2.501],
-                      [67.649, 3.76, 2.5], [66.414, 3.901, 2.459],
-                      [66.012, 4.085, 2.446], [65.834, 4.178, 2.44],
-                      [65.841, 4.386, 2.44], [65.745, 4.587, 2.438],
-                      [65.551, 4.78, 2.432], [65.486, 4.982, 2.43]])
-    color = np.array([[131, 95, 138], [71, 185, 253], [169, 47, 41],
-                      [174, 161, 88], [6, 158, 213], [6, 86, 78],
-                      [118, 161, 78], [72, 195, 138], [180, 170, 32],
-                      [197, 85, 27]])
-    points = np.concatenate([coord, color], axis=1)
-    points = DepthPoints(
-        points, points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
-    input_dict = dict(points=points)
-
-    color_mean = [100, 150, 200]
-    points_color_normalizer = NormalizePointsColor(color_mean=color_mean)
-    input_dict = points_color_normalizer(input_dict)
-    points = input_dict['points']
-    repr_str = repr(points_color_normalizer)
-    expected_repr_str = f'NormalizePointsColor(color_mean={color_mean})'
-
-    assert repr_str == expected_repr_str
-    assert np.allclose(points.coord, coord)
-    assert np.allclose(points.color,
-                       (color - np.array(color_mean)[None, :]) / 255.0)
--- a/tests/test_data/test_pipelines/test_outdoor_pipeline.py
+++ b/tests/test_data/test_pipelines/test_outdoor_pipeline.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import torch
-
-from mmdet3d.core.bbox import LiDARInstance3DBoxes
-from mmdet3d.datasets.pipelines import Compose
-
-
-def test_outdoor_aug_pipeline():
-    point_cloud_range = [0, -40, -3, 70.4, 40, 1]
-    class_names = ['Car']
-    np.random.seed(0)
-
-    train_pipeline = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='LIDAR',
-            load_dim=4,
-            use_dim=4),
-        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
-        dict(
-            type='ObjectNoise',
-            num_try=100,
-            translation_std=[1.0, 1.0, 0.5],
-            global_rot_range=[0.0, 0.0],
-            rot_range=[-0.78539816, 0.78539816]),
-        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
-        dict(
-            type='GlobalRotScaleTrans',
-            rot_range=[-0.78539816, 0.78539816],
-            scale_ratio_range=[0.95, 1.05]),
-        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
-        dict(type='PointShuffle'),
-        dict(type='DefaultFormatBundle3D', class_names=class_names),
-        dict(
-            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
-    ]
-    pipeline = Compose(train_pipeline)
-
-    # coord sys refactor: reverse sign of yaw
-    gt_bboxes_3d = LiDARInstance3DBoxes(
-        torch.tensor([
-            [
-                2.16902428e+01, -4.06038128e-02, -1.61906636e+00,
-                1.65999997e+00, 3.20000005e+00, 1.61000001e+00, 1.53999996e+00
-            ],
-            [
-                7.05006886e+00, -6.57459593e+00, -1.60107934e+00,
-                2.27999997e+00, 1.27799997e+01, 3.66000009e+00, -1.54999995e+00
-            ],
-            [
-                2.24698811e+01, -6.69203758e+00, -1.50118136e+00,
-                2.31999993e+00, 1.47299995e+01, 3.64000010e+00, -1.59000003e+00
-            ],
-            [
-                3.48291969e+01, -7.09058380e+00, -1.36622977e+00,
-                2.31999993e+00, 1.00400000e+01, 3.60999990e+00, -1.61000001e+00
-            ],
-            [
-                4.62394600e+01, -7.75838804e+00, -1.32405007e+00,
-                2.33999991e+00, 1.28299999e+01, 3.63000011e+00, -1.63999999e+00
-            ],
-            [
-                2.82966995e+01, -5.55755794e-01, -1.30332506e+00,
-                1.47000003e+00, 2.23000002e+00, 1.48000002e+00, 1.57000005e+00
-            ],
-            [
-                2.66690197e+01, 2.18230209e+01, -1.73605704e+00,
-                1.55999994e+00, 3.48000002e+00, 1.39999998e+00, 1.69000006e+00
-            ],
-            [
-                3.13197803e+01, 8.16214371e+00, -1.62177873e+00,
-                1.74000001e+00, 3.76999998e+00, 1.48000002e+00, -2.78999996e+00
-            ],
-            [
-                4.34395561e+01, -1.95209332e+01, -1.20757008e+00,
-                1.69000006e+00, 4.09999990e+00, 1.40999997e+00, 1.53999996e+00
-            ],
-            [
-                3.29882965e+01, -3.79360509e+00, -1.69245458e+00,
-                1.74000001e+00, 4.09000015e+00, 1.49000001e+00, 1.52999997e+00
-            ],
-            [
-                3.85469360e+01, 8.35060215e+00, -1.31423414e+00,
-                1.59000003e+00, 4.28000021e+00, 1.45000005e+00, -1.73000002e+00
-            ],
-            [
-                2.22492104e+01, -1.13536005e+01, -1.38272512e+00,
-                1.62000000e+00, 3.55999994e+00, 1.71000004e+00, -2.48000002e+00
-            ],
-            [
-                3.36115799e+01, -1.97708054e+01, -4.92827654e-01,
-                1.64999998e+00, 3.54999995e+00, 1.79999995e+00, 1.57000005e+00
-            ],
-            [
-                9.85029602e+00, -1.51294518e+00, -1.66834795e+00,
-                1.59000003e+00, 3.17000008e+00, 1.38999999e+00, 8.39999974e-01
-            ]
-        ],
-                     dtype=torch.float32))
-    gt_labels_3d = np.array([0, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
-    results = dict(
-        pts_filename='tests/data/kitti/a.bin',
-        ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),
-        bbox3d_fields=[],
-        img_fields=[])
-
-    origin_center = gt_bboxes_3d.tensor[:, :3].clone()
-    origin_angle = gt_bboxes_3d.tensor[:, 6].clone()
-
-    output = pipeline(results)
-
-    # manually go through the pipeline
-    rotation_angle = output['img_metas']._data['pcd_rotation_angle']
-    rotation_matrix = output['img_metas']._data['pcd_rotation']
-    noise_angle = torch.tensor([
-        0.70853819, -0.19160091, -0.71116999, 0.49571753, -0.12447527,
-        -0.4690133, -0.34776965, -0.65692282, -0.52442831, -0.01575567,
-        -0.61849673, 0.6572608, 0.30312288, -0.19182971
-    ])
-    noise_trans = torch.tensor([[1.7641e+00, 4.0016e-01, 4.8937e-01],
-                                [-1.3065e+00, 1.6581e+00, -5.9082e-02],
-                                [-1.5504e+00, 4.1732e-01, -4.7218e-01],
-                                [-5.2158e-01, -1.1847e+00, 4.8035e-01],
-                                [-8.9637e-01, -1.9627e+00, 7.9241e-01],
-                                [1.3240e-02, -1.2194e-01, 1.6953e-01],
-                                [8.1798e-01, -2.7891e-01, 7.1578e-01],
-                                [-4.1733e-04, 3.7416e-01, 2.0478e-01],
-                                [1.5218e-01, -3.7413e-01, -6.7257e-03],
-                                [-1.9138e+00, -2.2855e+00, -8.0092e-01],
-                                [1.5933e+00, 5.6872e-01, -5.7244e-02],
-                                [-1.8523e+00, -7.1333e-01, -8.8111e-01],
-                                [5.2678e-01, 1.0106e-01, -1.9432e-01],
-                                [-7.2449e-01, -8.0292e-01, -1.1334e-02]])
-    angle = -origin_angle - noise_angle + torch.tensor(rotation_angle)
-    angle -= 2 * np.pi * (angle >= np.pi)
-    angle += 2 * np.pi * (angle < -np.pi)
-    scale = output['img_metas']._data['pcd_scale_factor']
-
-    expected_tensor = torch.tensor(
-        [[20.6514, -8.8250, -1.0816, 1.5893, 3.0637, 1.5414],
-         [7.9374, 4.9457, -1.2008, 2.1829, 12.2357, 3.5041],
-         [20.8115, -2.0273, -1.8893, 2.2212, 14.1026, 3.4850],
-         [32.3850, -5.2135, -1.1321, 2.2212, 9.6124, 3.4562],
-         [43.7022, -7.8316, -0.5090, 2.2403, 12.2836, 3.4754],
-         [25.3300, -9.6670, -1.0855, 1.4074, 2.1350, 1.4170],
-         [16.5414, -29.0583, -0.9768, 1.4936, 3.3318, 1.3404],
-         [24.6548, -18.9226, -1.3567, 1.6659, 3.6094, 1.4170],
-         [45.8403, 1.8183, -1.1626, 1.6180, 3.9254, 1.3499],
-         [30.6288, -8.4497, -1.4881, 1.6659, 3.9158, 1.4265],
-         [32.3316, -22.4611, -1.3131, 1.5223, 4.0977, 1.3882],
-         [22.4492, 3.2944, -2.1674, 1.5510, 3.4084, 1.6372],
-         [37.3824, 5.0472, -0.6579, 1.5797, 3.3988, 1.7233],
-         [8.9259, -1.2578, -1.6081, 1.5223, 3.0350, 1.3308]])
-
-    expected_tensor[:, :3] = ((
-        (origin_center + noise_trans) * torch.tensor([1, -1, 1]))
-                              @ rotation_matrix) * scale
-
-    expected_tensor = torch.cat([expected_tensor, angle.unsqueeze(-1)], dim=-1)
-    assert torch.allclose(
-        output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)
-
-
-def test_outdoor_velocity_aug_pipeline():
-    point_cloud_range = [-50, -50, -5, 50, 50, 3]
-    class_names = [
-        'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
-        'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
-    ]
-    np.random.seed(0)
-
-    train_pipeline = [
-        dict(
-            type='LoadPointsFromFile',
-            coord_type='LIDAR',
-            load_dim=4,
-            use_dim=4),
-        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
-        dict(
-            type='GlobalRotScaleTrans',
-            rot_range=[-0.3925, 0.3925],
-            scale_ratio_range=[0.95, 1.05],
-            translation_std=[0, 0, 0]),
-        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
-        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
-        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
-        dict(type='PointShuffle'),
-        dict(type='DefaultFormatBundle3D', class_names=class_names),
-        dict(
-            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
-    ]
-    pipeline = Compose(train_pipeline)
-
-    gt_bboxes_3d = LiDARInstance3DBoxes(
-        torch.tensor(
-            [[
-                -5.2422e+00, 4.0021e+01, -4.7643e-01, 2.0620e+00, 4.4090e+00,
-                1.5480e+00, -1.4880e+00, 8.5338e-03, 4.4934e-02
-            ],
-             [
-                 -2.6675e+01, 5.5950e+00, -1.3053e+00, 3.4300e-01, 4.5800e-01,
-                 7.8200e-01, -4.6276e+00, -4.3284e-04, -1.8543e-03
-             ],
-             [
-                 -5.8098e+00, 3.5409e+01, -6.6511e-01, 2.3960e+00, 3.9690e+00,
-                 1.7320e+00, -4.6520e+00, 0.0000e+00, 0.0000e+00
-             ],
-             [
-                 -3.1309e+01, 1.0901e+00, -1.0561e+00, 1.9440e+00, 3.8570e+00,
-                 1.7230e+00, -2.8143e+00, -2.7606e-02, -8.0573e-02
-             ],
-             [
-                 -4.5642e+01, 2.0136e+01, -2.4681e-02, 1.9870e+00, 4.4400e+00,
-                 1.9420e+00, 2.8336e-01, 0.0000e+00, 0.0000e+00
-             ],
-             [
-                 -5.1617e+00, 1.8305e+01, -1.0879e+00, 2.3230e+00, 4.8510e+00,
-                 1.3710e+00, -1.5803e+00, 0.0000e+00, 0.0000e+00
-             ],
-             [
-                 -2.5285e+01, 4.1442e+00, -1.2713e+00, 1.7550e+00, 1.9890e+00,
-                 2.2200e+00, -4.4900e+00, -3.1784e-02, -1.5291e-01
-             ],
-             [
-                 -2.2611e+00, 1.9170e+01, -1.1452e+00, 9.1900e-01, 1.1230e+00,
-                 1.9310e+00, 4.7790e-02, 6.7684e-02, -1.7537e+00
-             ],
-             [
-                 -6.5878e+01, 1.3500e+01, -2.2528e-01, 1.8200e+00, 3.8520e+00,
-                 1.5450e+00, -2.8757e+00, 0.0000e+00, 0.0000e+00
-             ],
-             [
-                 -5.4490e+00, 2.8363e+01, -7.7275e-01, 2.2360e+00, 3.7540e+00,
-                 1.5590e+00, -4.6520e+00, -7.9736e-03, 7.7207e-03
-             ]],
-            dtype=torch.float32),
-        box_dim=9)
-
-    gt_labels_3d = np.array([0, 8, 0, 0, 0, 0, -1, 7, 0, 0])
-    results = dict(
-        pts_filename='tests/data/kitti/a.bin',
-        ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),
-        bbox3d_fields=[],
-        img_fields=[])
-
-    origin_center = gt_bboxes_3d.tensor[:, :3].clone()
-    origin_angle = gt_bboxes_3d.tensor[:, 6].clone(
-    )  # TODO: ObjectNoise modifies tensor!!
-    origin_velo = gt_bboxes_3d.tensor[:, 7:9].clone()
-
-    output = pipeline(results)
-
-    expected_tensor = torch.tensor(
-        [[
-            -3.7849e+00, -4.1057e+01, -4.8668e-01, 2.1064e+00, 4.5039e+00,
-            1.5813e+00, -1.6919e+00, 1.0469e-02, -4.5533e-02
-        ],
-         [
-             -2.7010e+01, -6.7551e+00, -1.3334e+00, 3.5038e-01, 4.6786e-01,
-             7.9883e-01, 1.4477e+00, -5.1440e-04, 1.8758e-03
-         ],
-         [
-             -4.5448e+00, -3.6372e+01, -6.7942e-01, 2.4476e+00, 4.0544e+00,
-             1.7693e+00, 1.4721e+00, 0.0000e+00, -0.0000e+00
-         ],
-         [
-             -3.1916e+01, -2.3379e+00, -1.0788e+00, 1.9858e+00, 3.9400e+00,
-             1.7601e+00, -3.6564e-01, -3.1333e-02, 8.1166e-02
-         ],
-         [
-             -4.5802e+01, -2.2340e+01, -2.5213e-02, 2.0298e+00, 4.5355e+00,
-             1.9838e+00, 2.8199e+00, 0.0000e+00, -0.0000e+00
-         ],
-         [
-             -4.5526e+00, -1.8887e+01, -1.1114e+00, 2.3730e+00, 4.9554e+00,
-             1.4005e+00, -1.5997e+00, 0.0000e+00, -0.0000e+00
-         ],
-         [
-             -2.5648e+01, -5.2197e+00, -1.2987e+00, 1.7928e+00, 2.0318e+00,
-             2.2678e+00, 1.3100e+00, -3.8428e-02, 1.5485e-01
-         ],
-         [
-             -1.5578e+00, -1.9657e+01, -1.1699e+00, 9.3878e-01, 1.1472e+00,
-             1.9726e+00, 3.0555e+00, 4.5907e-04, 1.7928e+00
-         ],
-         [
-             -4.4522e+00, -2.9166e+01, -7.8938e-01, 2.2841e+00, 3.8348e+00,
-             1.5925e+00, 1.4721e+00, -7.8371e-03, -8.1931e-03
-         ]])
-    # coord sys refactor (manually go through pipeline)
-    rotation_angle = output['img_metas']._data['pcd_rotation_angle']
-    rotation_matrix = output['img_metas']._data['pcd_rotation']
-    expected_tensor[:, :3] = ((origin_center @ rotation_matrix) *
-                              output['img_metas']._data['pcd_scale_factor'] *
-                              torch.tensor([1, -1, 1]))[[
-                                  0, 1, 2, 3, 4, 5, 6, 7, 9
-                              ]]
-    angle = -origin_angle - rotation_angle
-    angle -= 2 * np.pi * (angle >= np.pi)
-    angle += 2 * np.pi * (angle < -np.pi)
-    expected_tensor[:, 6:7] = angle.unsqueeze(-1)[[0, 1, 2, 3, 4, 5, 6, 7, 9]]
-    expected_tensor[:,
-                    7:9] = ((origin_velo @ rotation_matrix[:2, :2]) *
-                            output['img_metas']._data['pcd_scale_factor'] *
-                            torch.tensor([1, -1]))[[0, 1, 2, 3, 4, 5, 6, 7, 9]]
-    assert torch.allclose(
-        output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)
--- a/tests/test_models/test_detectors.py
+++ b/tests/test_models/test_detectors.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import copy
-import random
-from os.path import dirname, exists, join
-
-import numpy as np
-import pytest
-import torch
-
-from mmdet3d.core.bbox import (CameraInstance3DBoxes, DepthInstance3DBoxes,
-                               LiDARInstance3DBoxes)
-from mmdet3d.models.builder import build_detector
-
-
-def _setup_seed(seed):
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    np.random.seed(seed)
-    random.seed(seed)
-    torch.backends.cudnn.deterministic = True
-
-
-def _get_config_directory():
-    """Find the predefined detector config directory."""
-    try:
-        # Assume we are running in the source mmdetection3d repo
-        repo_dpath = dirname(dirname(dirname(__file__)))
-    except NameError:
-        # For IPython development when this __file__ is not defined
-        import mmdet3d
-        repo_dpath = dirname(dirname(mmdet3d.__file__))
-    config_dpath = join(repo_dpath, 'configs')
-    if not exists(config_dpath):
-        raise Exception('Cannot find config path')
-    return config_dpath
-
-
-def _get_config_module(fname):
-    """Load a configuration as a python module."""
-    from mmcv import Config
-    config_dpath = _get_config_directory()
-    config_fpath = join(config_dpath, fname)
-    config_mod = Config.fromfile(config_fpath)
-    return config_mod
-
-
-def _get_model_cfg(fname):
-    """Grab configs necessary to create a model.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-
-    return model
-
-
-def _get_detector_cfg(fname):
-    """Grab configs necessary to create a detector.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    import mmcv
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
-    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
-
-    model.update(train_cfg=train_cfg)
-    model.update(test_cfg=test_cfg)
-    return model
-
-
-def test_get_dynamic_voxelnet():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    dynamic_voxelnet_cfg = _get_model_cfg(
-        'dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car.py')
-    self = build_detector(dynamic_voxelnet_cfg).cuda()
-    points_0 = torch.rand([2010, 4], device='cuda')
-    points_1 = torch.rand([2020, 4], device='cuda')
-    points = [points_0, points_1]
-    feats = self.extract_feat(points, None)
-    assert feats[0].shape == torch.Size([2, 512, 200, 176])
-
-
-def test_voxel_net():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    voxel_net_cfg = _get_detector_cfg(
-        'second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py')
-
-    self = build_detector(voxel_net_cfg).cuda()
-    points_0 = torch.rand([2010, 4], device='cuda')
-    points_1 = torch.rand([2020, 4], device='cuda')
-    points = [points_0, points_1]
-    gt_bbox_0 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bbox_1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bboxes = [gt_bbox_0, gt_bbox_1]
-    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels = [gt_labels_0, gt_labels_1]
-    img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)
-    img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)
-    img_metas = [img_meta_0, img_meta_1]
-
-    # test forward_train
-    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
-    assert losses['loss_cls'][0] >= 0
-    assert losses['loss_bbox'][0] >= 0
-    assert losses['loss_dir'][0] >= 0
-
-    # test simple_test
-    with torch.no_grad():
-        results = self.simple_test(points, img_metas)
-    boxes_3d = results[0]['boxes_3d']
-    scores_3d = results[0]['scores_3d']
-    labels_3d = results[0]['labels_3d']
-    assert boxes_3d.tensor.shape == (50, 7)
-    assert scores_3d.shape == torch.Size([50])
-    assert labels_3d.shape == torch.Size([50])
-
-
-def test_3dssd():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    ssd3d_cfg = _get_detector_cfg('3dssd/3dssd_4x4_kitti-3d-car.py')
-    self = build_detector(ssd3d_cfg).cuda()
-    points_0 = torch.rand([2000, 4], device='cuda')
-    points_1 = torch.rand([2000, 4], device='cuda')
-    points = [points_0, points_1]
-    img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes)
-    img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes)
-    img_metas = [img_meta_0, img_meta_1]
-    gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bboxes = [gt_bbox_0, gt_bbox_1]
-    gt_labels_0 = torch.zeros([10], device='cuda').long()
-    gt_labels_1 = torch.zeros([10], device='cuda').long()
-    gt_labels = [gt_labels_0, gt_labels_1]
-
-    # test forward_train
-    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
-    assert losses['vote_loss'] >= 0
-    assert losses['centerness_loss'] >= 0
-    assert losses['center_loss'] >= 0
-    assert losses['dir_class_loss'] >= 0
-    assert losses['dir_res_loss'] >= 0
-    assert losses['corner_loss'] >= 0
-    assert losses['size_res_loss'] >= 0
-
-    # test simple_test
-    with torch.no_grad():
-        results = self.simple_test(points, img_metas)
-    boxes_3d = results[0]['boxes_3d']
-    scores_3d = results[0]['scores_3d']
-    labels_3d = results[0]['labels_3d']
-    assert boxes_3d.tensor.shape[0] >= 0
-    assert boxes_3d.tensor.shape[1] == 7
-    assert scores_3d.shape[0] >= 0
-    assert labels_3d.shape[0] >= 0
-
-
-def test_vote_net():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    _setup_seed(0)
-    vote_net_cfg = _get_detector_cfg(
-        'votenet/votenet_16x8_sunrgbd-3d-10class.py')
-    self = build_detector(vote_net_cfg).cuda()
-    points_0 = torch.rand([2000, 4], device='cuda')
-    points_1 = torch.rand([2000, 4], device='cuda')
-    points = [points_0, points_1]
-    img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes)
-    img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes)
-    img_metas = [img_meta_0, img_meta_1]
-    gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bboxes = [gt_bbox_0, gt_bbox_1]
-    gt_labels_0 = torch.randint(0, 10, [10], device='cuda')
-    gt_labels_1 = torch.randint(0, 10, [10], device='cuda')
-    gt_labels = [gt_labels_0, gt_labels_1]
-
-    # test forward_train
-    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
-    assert losses['vote_loss'] >= 0
-    assert losses['objectness_loss'] >= 0
-    assert losses['semantic_loss'] >= 0
-    assert losses['center_loss'] >= 0
-    assert losses['dir_class_loss'] >= 0
-    assert losses['dir_res_loss'] >= 0
-    assert losses['size_class_loss'] >= 0
-    assert losses['size_res_loss'] >= 0
-
-    # test simple_test
-    with torch.no_grad():
-        results = self.simple_test(points, img_metas)
-    boxes_3d = results[0]['boxes_3d']
-    scores_3d = results[0]['scores_3d']
-    labels_3d = results[0]['labels_3d']
-    assert boxes_3d.tensor.shape[0] >= 0
-    assert boxes_3d.tensor.shape[1] == 7
-    assert scores_3d.shape[0] >= 0
-    assert labels_3d.shape[0] >= 0
-
-
-def test_parta2():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    parta2 = _get_detector_cfg(
-        'parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')
-    self = build_detector(parta2).cuda()
-    points_0 = torch.rand([1000, 4], device='cuda')
-    points_1 = torch.rand([1000, 4], device='cuda')
-    points = [points_0, points_1]
-    img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)
-    img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)
-    img_metas = [img_meta_0, img_meta_1]
-    gt_bbox_0 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bbox_1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bboxes = [gt_bbox_0, gt_bbox_1]
-    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels = [gt_labels_0, gt_labels_1]
-
-    # test_forward_train
-    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
-    assert losses['loss_rpn_cls'][0] >= 0
-    assert losses['loss_rpn_bbox'][0] >= 0
-    assert losses['loss_rpn_dir'][0] >= 0
-    assert losses['loss_seg'] >= 0
-    assert losses['loss_part'] >= 0
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_bbox'] >= 0
-    assert losses['loss_corner'] >= 0
-
-    # test_simple_test
-    with torch.no_grad():
-        results = self.simple_test(points, img_metas)
-    boxes_3d = results[0]['boxes_3d']
-    scores_3d = results[0]['scores_3d']
-    labels_3d = results[0]['labels_3d']
-    assert boxes_3d.tensor.shape[0] >= 0
-    assert boxes_3d.tensor.shape[1] == 7
-    assert scores_3d.shape[0] >= 0
-    assert labels_3d.shape[0] >= 0
-
-
-def test_centerpoint():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    centerpoint = _get_detector_cfg(
-        'centerpoint/centerpoint_0075voxel_second_secfpn_'
-        'dcn_4x8_cyclic_flip-tta_20e_nus.py')
-    self = build_detector(centerpoint).cuda()
-    points_0 = torch.rand([1000, 5], device='cuda')
-    points_1 = torch.rand([1000, 5], device='cuda')
-    points = [points_0, points_1]
-    img_meta_0 = dict(
-        box_type_3d=LiDARInstance3DBoxes,
-        flip=True,
-        pcd_horizontal_flip=True,
-        pcd_vertical_flip=False)
-    img_meta_1 = dict(
-        box_type_3d=LiDARInstance3DBoxes,
-        flip=True,
-        pcd_horizontal_flip=False,
-        pcd_vertical_flip=True)
-    img_metas = [img_meta_0, img_meta_1]
-    gt_bbox_0 = LiDARInstance3DBoxes(
-        torch.rand([10, 9], device='cuda'), box_dim=9)
-    gt_bbox_1 = LiDARInstance3DBoxes(
-        torch.rand([10, 9], device='cuda'), box_dim=9)
-    gt_bboxes = [gt_bbox_0, gt_bbox_1]
-    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels = [gt_labels_0, gt_labels_1]
-
-    # test_forward_train
-    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
-    for key, value in losses.items():
-        assert value >= 0
-
-    # test_simple_test
-    with torch.no_grad():
-        results = self.simple_test(points, img_metas)
-    boxes_3d_0 = results[0]['pts_bbox']['boxes_3d']
-    scores_3d_0 = results[0]['pts_bbox']['scores_3d']
-    labels_3d_0 = results[0]['pts_bbox']['labels_3d']
-    assert boxes_3d_0.tensor.shape[0] >= 0
-    assert boxes_3d_0.tensor.shape[1] == 9
-    assert scores_3d_0.shape[0] >= 0
-    assert labels_3d_0.shape[0] >= 0
-    boxes_3d_1 = results[1]['pts_bbox']['boxes_3d']
-    scores_3d_1 = results[1]['pts_bbox']['scores_3d']
-    labels_3d_1 = results[1]['pts_bbox']['labels_3d']
-    assert boxes_3d_1.tensor.shape[0] >= 0
-    assert boxes_3d_1.tensor.shape[1] == 9
-    assert scores_3d_1.shape[0] >= 0
-    assert labels_3d_1.shape[0] >= 0
-
-    # test_aug_test
-    points = [[torch.rand([1000, 5], device='cuda')]]
-    img_metas = [[
-        dict(
-            box_type_3d=LiDARInstance3DBoxes,
-            pcd_scale_factor=1.0,
-            flip=True,
-            pcd_horizontal_flip=True,
-            pcd_vertical_flip=False)
-    ]]
-    with torch.no_grad():
-        results = self.aug_test(points, img_metas)
-    boxes_3d_0 = results[0]['pts_bbox']['boxes_3d']
-    scores_3d_0 = results[0]['pts_bbox']['scores_3d']
-    labels_3d_0 = results[0]['pts_bbox']['labels_3d']
-    assert boxes_3d_0.tensor.shape[0] >= 0
-    assert boxes_3d_0.tensor.shape[1] == 9
-    assert scores_3d_0.shape[0] >= 0
-    assert labels_3d_0.shape[0] >= 0
-
-
-def test_fcos3d():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    _setup_seed(0)
-    fcos3d_cfg = _get_detector_cfg(
-        'fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py')
-    self = build_detector(fcos3d_cfg).cuda()
-    imgs = torch.rand([1, 3, 928, 1600], dtype=torch.float32).cuda()
-    gt_bboxes = [torch.rand([3, 4], dtype=torch.float32).cuda()]
-    gt_bboxes_3d = CameraInstance3DBoxes(
-        torch.rand([3, 9], device='cuda'), box_dim=9)
-    gt_labels = [torch.randint(0, 10, [3], device='cuda')]
-    gt_labels_3d = gt_labels
-    centers2d = [torch.rand([3, 2], dtype=torch.float32).cuda()]
-    depths = [torch.rand([3], dtype=torch.float32).cuda()]
-    attr_labels = [torch.randint(0, 9, [3], device='cuda')]
-    img_metas = [
-        dict(
-            cam2img=[[1260.8474446004698, 0.0, 807.968244525554],
-                     [0.0, 1260.8474446004698, 495.3344268742088],
-                     [0.0, 0.0, 1.0]],
-            scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
-            box_type_3d=CameraInstance3DBoxes)
-    ]
-
-    # test forward_train
-    losses = self.forward_train(imgs, img_metas, gt_bboxes, gt_labels,
-                                gt_bboxes_3d, gt_labels_3d, centers2d, depths,
-                                attr_labels)
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_offset'] >= 0
-    assert losses['loss_depth'] >= 0
-    assert losses['loss_size'] >= 0
-    assert losses['loss_rotsin'] >= 0
-    assert losses['loss_centerness'] >= 0
-    assert losses['loss_velo'] >= 0
-    assert losses['loss_dir'] >= 0
-    assert losses['loss_attr'] >= 0
-
-    # test simple_test
-    with torch.no_grad():
-        results = self.simple_test(imgs, img_metas)
-    boxes_3d = results[0]['img_bbox']['boxes_3d']
-    scores_3d = results[0]['img_bbox']['scores_3d']
-    labels_3d = results[0]['img_bbox']['labels_3d']
-    attrs_3d = results[0]['img_bbox']['attrs_3d']
-    assert boxes_3d.tensor.shape[0] >= 0
-    assert boxes_3d.tensor.shape[1] == 9
-    assert scores_3d.shape[0] >= 0
-    assert labels_3d.shape[0] >= 0
-    assert attrs_3d.shape[0] >= 0
-
-
-def test_groupfree3dnet():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    _setup_seed(0)
-    groupfree3d_cfg = _get_detector_cfg(
-        'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py')
-    self = build_detector(groupfree3d_cfg).cuda()
-
-    points_0 = torch.rand([50000, 3], device='cuda')
-    points_1 = torch.rand([50000, 3], device='cuda')
-    points = [points_0, points_1]
-    img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes)
-    img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes)
-    img_metas = [img_meta_0, img_meta_1]
-    gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bboxes = [gt_bbox_0, gt_bbox_1]
-    gt_labels_0 = torch.randint(0, 18, [10], device='cuda')
-    gt_labels_1 = torch.randint(0, 18, [10], device='cuda')
-    gt_labels = [gt_labels_0, gt_labels_1]
-    pts_instance_mask_1 = torch.randint(0, 10, [50000], device='cuda')
-    pts_instance_mask_2 = torch.randint(0, 10, [50000], device='cuda')
-    pts_instance_mask = [pts_instance_mask_1, pts_instance_mask_2]
-    pts_semantic_mask_1 = torch.randint(0, 19, [50000], device='cuda')
-    pts_semantic_mask_2 = torch.randint(0, 19, [50000], device='cuda')
-    pts_semantic_mask = [pts_semantic_mask_1, pts_semantic_mask_2]
-
-    # test forward_train
-    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels,
-                                pts_semantic_mask, pts_instance_mask)
-
-    assert losses['sampling_objectness_loss'] >= 0
-    assert losses['s5.objectness_loss'] >= 0
-    assert losses['s5.semantic_loss'] >= 0
-    assert losses['s5.center_loss'] >= 0
-    assert losses['s5.dir_class_loss'] >= 0
-    assert losses['s5.dir_res_loss'] >= 0
-    assert losses['s5.size_class_loss'] >= 0
-    assert losses['s5.size_res_loss'] >= 0
-
-    # test simple_test
-    with torch.no_grad():
-        results = self.simple_test(points, img_metas)
-    boxes_3d = results[0]['boxes_3d']
-    scores_3d = results[0]['scores_3d']
-    labels_3d = results[0]['labels_3d']
-    assert boxes_3d.tensor.shape[0] >= 0
-    assert boxes_3d.tensor.shape[1] == 7
-    assert scores_3d.shape[0] >= 0
-    assert labels_3d.shape[0] >= 0
-
-
-def test_imvoxelnet():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    imvoxelnet_cfg = _get_detector_cfg(
-        'imvoxelnet/imvoxelnet_4x8_kitti-3d-car.py')
-    self = build_detector(imvoxelnet_cfg).cuda()
-    imgs = torch.rand([1, 3, 384, 1280], dtype=torch.float32).cuda()
-    gt_bboxes_3d = [LiDARInstance3DBoxes(torch.rand([3, 7], device='cuda'))]
-    gt_labels_3d = [torch.zeros([3], dtype=torch.long, device='cuda')]
-    img_metas = [
-        dict(
-            box_type_3d=LiDARInstance3DBoxes,
-            lidar2img=np.array([[6.0e+02, -7.2e+02, -1.2e+00, -1.2e+02],
-                                [1.8e+02, 7.6e+00, -7.1e+02, -1.0e+02],
-                                [9.9e-01, 1.2e-04, 1.0e-02, -2.6e-01],
-                                [0.0e+00, 0.0e+00, 0.0e+00, 1.0e+00]],
-                               dtype=np.float32),
-            img_shape=(384, 1272, 3))
-    ]
-
-    # test forward_train
-    losses = self.forward_train(imgs, img_metas, gt_bboxes_3d, gt_labels_3d)
-    assert losses['loss_cls'][0] >= 0
-    assert losses['loss_bbox'][0] >= 0
-    assert losses['loss_dir'][0] >= 0
-
-    # test simple_test
-    with torch.no_grad():
-        results = self.simple_test(imgs, img_metas)
-    boxes_3d = results[0]['boxes_3d']
-    scores_3d = results[0]['scores_3d']
-    labels_3d = results[0]['labels_3d']
-    assert boxes_3d.tensor.shape[0] >= 0
-    assert boxes_3d.tensor.shape[1] == 7
-    assert scores_3d.shape[0] >= 0
-    assert labels_3d.shape[0] >= 0
-
-
-def test_point_rcnn():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    pointrcnn_cfg = _get_detector_cfg(
-        'point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py')
-    self = build_detector(pointrcnn_cfg).cuda()
-    points_0 = torch.rand([1000, 4], device='cuda')
-    points_1 = torch.rand([1000, 4], device='cuda')
-    points = [points_0, points_1]
-
-    img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)
-    img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)
-    img_metas = [img_meta_0, img_meta_1]
-    gt_bbox_0 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bbox_1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bboxes = [gt_bbox_0, gt_bbox_1]
-    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels = [gt_labels_0, gt_labels_1]
-
-    # test_forward_train
-    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
-    assert losses['bbox_loss'] >= 0
-    assert losses['semantic_loss'] >= 0
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_bbox'] >= 0
-    assert losses['loss_corner'] >= 0
-
-
-def test_smoke():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    _setup_seed(0)
-    smoke_cfg = _get_detector_cfg(
-        'smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py')
-    self = build_detector(smoke_cfg).cuda()
-    imgs = torch.rand([1, 3, 384, 1280], dtype=torch.float32).cuda()
-    gt_bboxes = [
-        torch.Tensor([[563.63122442, 175.02195182, 614.81298184, 224.97763099],
-                      [480.89676358, 179.86272635, 511.53017463, 202.54645962],
-                      [541.48322272, 175.73767011, 564.55208966, 193.95009791],
-                      [329.51448848, 176.14566789, 354.24670848,
-                       213.82599081]]).cuda()
-    ]
-    gt_bboxes_3d = [
-        CameraInstance3DBoxes(
-            torch.Tensor([[-0.69, 1.69, 25.01, 3.20, 1.61, 1.66, -1.59],
-                          [-7.43, 1.88, 47.55, 3.70, 1.40, 1.51, 1.55],
-                          [-4.71, 1.71, 60.52, 4.05, 1.46, 1.66, 1.56],
-                          [-12.63, 1.88, 34.09, 1.95, 1.72, 0.50,
-                           1.54]]).cuda(),
-            box_dim=7)
-    ]
-    gt_labels = [torch.tensor([0, 0, 0, 1]).cuda()]
-    gt_labels_3d = gt_labels
-    centers2d = [
-        torch.Tensor([[589.6528477, 198.3862263], [496.8143155, 190.75967182],
-                      [553.40528354, 184.53785991],
-                      [342.23690317, 194.44298819]]).cuda()
-    ]
-    # depths is actually not used in smoke head loss computation
-    depths = [torch.rand([3], dtype=torch.float32).cuda()]
-    attr_labels = None
-    img_metas = [
-        dict(
-            cam2img=[[721.5377, 0., 609.5593, 0.], [0., 721.5377, 172.854, 0.],
-                     [0., 0., 1., 0.], [0., 0., 0., 1.]],
-            scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
-            pad_shape=[384, 1280],
-            trans_mat=np.array([[0.25, 0., 0.], [0., 0.25, 0], [0., 0., 1.]],
-                               dtype=np.float32),
-            affine_aug=False,
-            box_type_3d=CameraInstance3DBoxes)
-    ]
-
-    # test forward_train
-    losses = self.forward_train(imgs, img_metas, gt_bboxes, gt_labels,
-                                gt_bboxes_3d, gt_labels_3d, centers2d, depths,
-                                attr_labels)
-
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_bbox'] >= 0
-
-    # test simple_test
-    with torch.no_grad():
-        results = self.simple_test(imgs, img_metas)
-    boxes_3d = results[0]['img_bbox']['boxes_3d']
-    scores_3d = results[0]['img_bbox']['scores_3d']
-    labels_3d = results[0]['img_bbox']['labels_3d']
-    assert boxes_3d.tensor.shape[0] >= 0
-    assert boxes_3d.tensor.shape[1] == 7
-    assert scores_3d.shape[0] >= 0
-    assert labels_3d.shape[0] >= 0
-
-
-def test_sassd():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    sassd_cfg = _get_detector_cfg('sassd/sassd_6x8_80e_kitti-3d-3class.py')
-
-    self = build_detector(sassd_cfg).cuda()
-    points_0 = torch.rand([2010, 4], device='cuda')
-    points_1 = torch.rand([2020, 4], device='cuda')
-    points = [points_0, points_1]
-    gt_bbox_0 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bbox_1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bboxes = [gt_bbox_0, gt_bbox_1]
-    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')
-    gt_labels = [gt_labels_0, gt_labels_1]
-    img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)
-    img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)
-    img_metas = [img_meta_0, img_meta_1]
-
-    # test forward_train
-    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
-    assert losses['loss_cls'][0] >= 0
-    assert losses['loss_bbox'][0] >= 0
-    assert losses['loss_dir'][0] >= 0
-    assert losses['aux_loss_cls'][0] >= 0
-    assert losses['aux_loss_reg'][0] >= 0
-
-    # test simple_test
-    with torch.no_grad():
-        results = self.simple_test(points, img_metas)
-    boxes_3d = results[0]['boxes_3d']
-    scores_3d = results[0]['scores_3d']
-    labels_3d = results[0]['labels_3d']
-    assert boxes_3d.tensor.shape == (50, 7)
-    assert scores_3d.shape == torch.Size([50])
-    assert labels_3d.shape == torch.Size([50])
--- a/tests/test_models/test_forward.py
+++ b/tests/test_models/test_forward.py
-# Copyright (c) OpenMMLab. All rights reserved.
-"""Test model forward process.
-
-CommandLine:
-    pytest tests/test_models/test_forward.py
-    xdoctest tests/test_models/test_forward.py zero
-"""
-import copy
-from os.path import dirname, exists, join
-
-import numpy as np
-import torch
-
-
-def _get_config_directory():
-    """Find the predefined detector config directory."""
-    try:
-        # Assume we are running in the source mmdetection3d repo
-        repo_dpath = dirname(dirname(dirname(__file__)))
-    except NameError:
-        # For IPython development when this __file__ is not defined
-        import mmdet3d
-        repo_dpath = dirname(dirname(mmdet3d.__file__))
-    config_dpath = join(repo_dpath, 'configs')
-    if not exists(config_dpath):
-        raise Exception('Cannot find config path')
-    return config_dpath
-
-
-def _get_config_module(fname):
-    """Load a configuration as a python module."""
-    from mmcv import Config
-    config_dpath = _get_config_directory()
-    config_fpath = join(config_dpath, fname)
-    config_mod = Config.fromfile(config_fpath)
-    return config_mod
-
-
-def _get_detector_cfg(fname):
-    """Grab configs necessary to create a detector.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-    return model
-
-
-def _test_two_stage_forward(cfg_file):
-    model = _get_detector_cfg(cfg_file)
-    model['pretrained'] = None
-
-    from mmdet.models import build_detector
-    detector = build_detector(model)
-
-    input_shape = (1, 3, 256, 256)
-
-    # Test forward train with a non-empty truth batch
-    mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])
-    imgs = mm_inputs.pop('imgs')
-    img_metas = mm_inputs.pop('img_metas')
-    gt_bboxes = mm_inputs['gt_bboxes']
-    gt_labels = mm_inputs['gt_labels']
-    gt_masks = mm_inputs['gt_masks']
-    losses = detector.forward(
-        imgs,
-        img_metas,
-        gt_bboxes=gt_bboxes,
-        gt_labels=gt_labels,
-        gt_masks=gt_masks,
-        return_loss=True)
-    assert isinstance(losses, dict)
-    loss, _ = detector._parse_losses(losses)
-    loss.requires_grad_(True)
-    assert float(loss.item()) > 0
-    loss.backward()
-
-    # Test forward train with an empty truth batch
-    mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])
-    imgs = mm_inputs.pop('imgs')
-    img_metas = mm_inputs.pop('img_metas')
-    gt_bboxes = mm_inputs['gt_bboxes']
-    gt_labels = mm_inputs['gt_labels']
-    gt_masks = mm_inputs['gt_masks']
-    losses = detector.forward(
-        imgs,
-        img_metas,
-        gt_bboxes=gt_bboxes,
-        gt_labels=gt_labels,
-        gt_masks=gt_masks,
-        return_loss=True)
-    assert isinstance(losses, dict)
-    loss, _ = detector._parse_losses(losses)
-    assert float(loss.item()) > 0
-    loss.backward()
-
-    # Test forward test
-    with torch.no_grad():
-        img_list = [g[None, :] for g in imgs]
-        batch_results = []
-        for one_img, one_meta in zip(img_list, img_metas):
-            result = detector.forward([one_img], [[one_meta]],
-                                      return_loss=False)
-            batch_results.append(result)
-
-
-def _test_single_stage_forward(cfg_file):
-    model = _get_detector_cfg(cfg_file)
-    model['pretrained'] = None
-
-    from mmdet.models import build_detector
-    detector = build_detector(model)
-
-    input_shape = (1, 3, 300, 300)
-    mm_inputs = _demo_mm_inputs(input_shape)
-
-    imgs = mm_inputs.pop('imgs')
-    img_metas = mm_inputs.pop('img_metas')
-
-    # Test forward train
-    gt_bboxes = mm_inputs['gt_bboxes']
-    gt_labels = mm_inputs['gt_labels']
-    losses = detector.forward(
-        imgs,
-        img_metas,
-        gt_bboxes=gt_bboxes,
-        gt_labels=gt_labels,
-        return_loss=True)
-    assert isinstance(losses, dict)
-    loss, _ = detector._parse_losses(losses)
-    assert float(loss.item()) > 0
-
-    # Test forward test
-    with torch.no_grad():
-        img_list = [g[None, :] for g in imgs]
-        batch_results = []
-        for one_img, one_meta in zip(img_list, img_metas):
-            result = detector.forward([one_img], [[one_meta]],
-                                      return_loss=False)
-            batch_results.append(result)
-
-
-def _demo_mm_inputs(input_shape=(1, 3, 300, 300),
-                    num_items=None, num_classes=10):  # yapf: disable
-    """Create a superset of inputs needed to run test or train batches.
-
-    Args:
-        input_shape (tuple):
-            input batch dimensions
-
-        num_items (List[int]):
-            specifies the number of boxes in each batch item
-
-        num_classes (int):
-            number of different labels a box might have
-    """
-    from mmdet.core import BitmapMasks
-
-    (N, C, H, W) = input_shape
-
-    rng = np.random.RandomState(0)
-
-    imgs = rng.rand(*input_shape)
-
-    img_metas = [{
-        'img_shape': (H, W, C),
-        'ori_shape': (H, W, C),
-        'pad_shape': (H, W, C),
-        'filename': '<demo>.png',
-        'scale_factor': 1.0,
-        'flip': False,
-    } for _ in range(N)]
-
-    gt_bboxes = []
-    gt_labels = []
-    gt_masks = []
-
-    for batch_idx in range(N):
-        if num_items is None:
-            num_boxes = rng.randint(1, 10)
-        else:
-            num_boxes = num_items[batch_idx]
-
-        cx, cy, bw, bh = rng.rand(num_boxes, 4).T
-
-        tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)
-        tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)
-        br_x = ((cx * W) + (W * bw / 2)).clip(0, W)
-        br_y = ((cy * H) + (H * bh / 2)).clip(0, H)
-
-        boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T
-        class_idxs = rng.randint(1, num_classes, size=num_boxes)
-
-        gt_bboxes.append(torch.FloatTensor(boxes))
-        gt_labels.append(torch.LongTensor(class_idxs))
-
-    mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)
-    gt_masks.append(BitmapMasks(mask, H, W))
-
-    mm_inputs = {
-        'imgs': torch.FloatTensor(imgs).requires_grad_(True),
-        'img_metas': img_metas,
-        'gt_bboxes': gt_bboxes,
-        'gt_labels': gt_labels,
-        'gt_bboxes_ignore': None,
-        'gt_masks': gt_masks,
-    }
-    return mm_inputs
--- a/tests/test_models/test_fusion/test_fusion_coord_trans.py
+++ b/tests/test_models/test_fusion/test_fusion_coord_trans.py
--- a/tests/test_models/test_fusion/test_point_fusion.py
+++ b/tests/test_models/test_fusion/test_point_fusion.py
--- a/tests/test_models/test_fusion/test_vote_fusion.py
+++ b/tests/test_models/test_fusion/test_vote_fusion.py
--- a/tests/test_models/test_heads/test_dgcnn_decode_head.py
+++ b/tests/test_models/test_heads/test_dgcnn_decode_head.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import pytest
-import torch
-from mmcv.cnn.bricks import ConvModule
-
-from mmdet3d.models.builder import build_head
-
-
-def test_dgcnn_decode_head_loss():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    dgcnn_decode_head_cfg = dict(
-        type='DGCNNHead',
-        fp_channels=(1024, 512),
-        channels=256,
-        num_classes=13,
-        dropout_ratio=0.5,
-        conv_cfg=dict(type='Conv1d'),
-        norm_cfg=dict(type='BN1d'),
-        act_cfg=dict(type='LeakyReLU', negative_slope=0.2),
-        loss_decode=dict(
-            type='CrossEntropyLoss',
-            use_sigmoid=False,
-            class_weight=None,
-            loss_weight=1.0),
-        ignore_index=13)
-
-    self = build_head(dgcnn_decode_head_cfg)
-    self.cuda()
-    assert isinstance(self.conv_seg, torch.nn.Conv1d)
-    assert self.conv_seg.in_channels == 256
-    assert self.conv_seg.out_channels == 13
-    assert self.conv_seg.kernel_size == (1, )
-    assert isinstance(self.pre_seg_conv, ConvModule)
-    assert isinstance(self.pre_seg_conv.conv, torch.nn.Conv1d)
-    assert self.pre_seg_conv.conv.in_channels == 512
-    assert self.pre_seg_conv.conv.out_channels == 256
-    assert self.pre_seg_conv.conv.kernel_size == (1, )
-    assert isinstance(self.pre_seg_conv.bn, torch.nn.BatchNorm1d)
-    assert self.pre_seg_conv.bn.num_features == 256
-
-    # test forward
-    fa_points = torch.rand(2, 4096, 1024).float().cuda()
-    input_dict = dict(fa_points=fa_points)
-    seg_logits = self(input_dict)
-    assert seg_logits.shape == torch.Size([2, 13, 4096])
-
-    # test loss
-    pts_semantic_mask = torch.randint(0, 13, (2, 4096)).long().cuda()
-    losses = self.losses(seg_logits, pts_semantic_mask)
-    assert losses['loss_sem_seg'].item() > 0
-
-    # test loss with ignore_index
-    ignore_index_mask = torch.ones_like(pts_semantic_mask) * 13
-    losses = self.losses(seg_logits, ignore_index_mask)
-    assert losses['loss_sem_seg'].item() == 0
-
-    # test loss with class_weight
-    dgcnn_decode_head_cfg['loss_decode'] = dict(
-        type='CrossEntropyLoss',
-        use_sigmoid=False,
-        class_weight=np.random.rand(13),
-        loss_weight=1.0)
-    self = build_head(dgcnn_decode_head_cfg)
-    self.cuda()
-    losses = self.losses(seg_logits, pts_semantic_mask)
-    assert losses['loss_sem_seg'].item() > 0
--- a/tests/test_models/test_heads/test_heads.py
+++ b/tests/test_models/test_heads/test_heads.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import copy
-import random
-from os.path import dirname, exists, join
-
-import mmcv
-import numpy as np
-import pytest
-import torch
-
-from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes,
-                               DepthInstance3DBoxes, LiDARInstance3DBoxes)
-from mmdet3d.models.builder import build_head
-from mmdet.apis import set_random_seed
-
-
-def _setup_seed(seed):
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    np.random.seed(seed)
-    random.seed(seed)
-    torch.backends.cudnn.deterministic = True
-
-
-def _get_config_directory():
-    """Find the predefined detector config directory."""
-    try:
-        # Assume we are running in the source mmdetection3d repo
-        repo_dpath = dirname(dirname(dirname(dirname(__file__))))
-    except NameError:
-        # For IPython development when this __file__ is not defined
-        import mmdet3d
-        repo_dpath = dirname(dirname(mmdet3d.__file__))
-    config_dpath = join(repo_dpath, 'configs')
-    if not exists(config_dpath):
-        raise Exception('Cannot find config path')
-    return config_dpath
-
-
-def _get_config_module(fname):
-    """Load a configuration as a python module."""
-    from mmcv import Config
-    config_dpath = _get_config_directory()
-    config_fpath = join(config_dpath, fname)
-    config_mod = Config.fromfile(config_fpath)
-    return config_mod
-
-
-def _get_head_cfg(fname):
-    """Grab configs necessary to create a bbox_head.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
-    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
-
-    bbox_head = model.bbox_head
-    bbox_head.update(train_cfg=train_cfg)
-    bbox_head.update(test_cfg=test_cfg)
-    return bbox_head
-
-
-def _get_rpn_head_cfg(fname):
-    """Grab configs necessary to create a rpn_head.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
-    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
-
-    rpn_head = model.rpn_head
-    rpn_head.update(train_cfg=train_cfg.rpn)
-    rpn_head.update(test_cfg=test_cfg.rpn)
-    return rpn_head, train_cfg.rpn_proposal
-
-
-def _get_roi_head_cfg(fname):
-    """Grab configs necessary to create a roi_head.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
-    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
-
-    roi_head = model.roi_head
-    roi_head.update(train_cfg=train_cfg.rcnn)
-    roi_head.update(test_cfg=test_cfg.rcnn)
-    return roi_head
-
-
-def _get_pts_bbox_head_cfg(fname):
-    """Grab configs necessary to create a pts_bbox_head.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg.pts))
-    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg.pts))
-
-    pts_bbox_head = model.pts_bbox_head
-    pts_bbox_head.update(train_cfg=train_cfg)
-    pts_bbox_head.update(test_cfg=test_cfg)
-    return pts_bbox_head
-
-
-def _get_pointrcnn_rpn_head_cfg(fname):
-    """Grab configs necessary to create a rpn_head.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
-    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
-
-    rpn_head = model.rpn_head
-    rpn_head.update(train_cfg=train_cfg.rpn)
-    rpn_head.update(test_cfg=test_cfg.rpn)
-    return rpn_head, train_cfg.rpn
-
-
-def _get_vote_head_cfg(fname):
-    """Grab configs necessary to create a vote_head.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
-    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
-
-    vote_head = model.bbox_head
-    vote_head.update(train_cfg=train_cfg)
-    vote_head.update(test_cfg=test_cfg)
-    return vote_head
-
-
-def _get_parta2_bbox_head_cfg(fname):
-    """Grab configs necessary to create a parta2_bbox_head.
-
-    These are deep copied to allow for safe modification of parameters without
-    influencing other tests.
-    """
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-
-    vote_head = model.roi_head.bbox_head
-    return vote_head
-
-
-def _get_pointrcnn_bbox_head_cfg(fname):
-    config = _get_config_module(fname)
-    model = copy.deepcopy(config.model)
-
-    vote_head = model.roi_head.bbox_head
-    return vote_head
-
-
-def test_anchor3d_head_loss():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    bbox_head_cfg = _get_head_cfg(
-        'second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py')
-
-    from mmdet3d.models.builder import build_head
-    self = build_head(bbox_head_cfg)
-    self.cuda()
-    assert isinstance(self.conv_cls, torch.nn.modules.conv.Conv2d)
-    assert self.conv_cls.in_channels == 512
-    assert self.conv_cls.out_channels == 18
-    assert self.conv_reg.out_channels == 42
-    assert self.conv_dir_cls.out_channels == 12
-
-    # test forward
-    feats = list()
-    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
-    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
-    assert cls_score[0].shape == torch.Size([2, 18, 200, 176])
-    assert bbox_pred[0].shape == torch.Size([2, 42, 200, 176])
-    assert dir_cls_preds[0].shape == torch.Size([2, 12, 200, 176])
-
-    # test loss
-    gt_bboxes = list(
-        torch.tensor(
-            [[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
-             [[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]]],
-            dtype=torch.float32).cuda())
-    gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())
-    input_metas = [{
-        'sample_idx': 1234
-    }, {
-        'sample_idx': 2345
-    }]  # fake input_metas
-
-    losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
-                       gt_labels, input_metas)
-    assert losses['loss_cls'][0] > 0
-    assert losses['loss_bbox'][0] > 0
-    assert losses['loss_dir'][0] > 0
-
-    # test empty ground truth case
-    gt_bboxes = list(torch.empty((2, 0, 7)).cuda())
-    gt_labels = list(torch.empty((2, 0)).cuda())
-    empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
-                                gt_labels, input_metas)
-    assert empty_gt_losses['loss_cls'][0] > 0
-    assert empty_gt_losses['loss_bbox'][0] == 0
-    assert empty_gt_losses['loss_dir'][0] == 0
-
-
-def test_anchor3d_head_getboxes():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    bbox_head_cfg = _get_head_cfg(
-        'second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py')
-
-    from mmdet3d.models.builder import build_head
-    self = build_head(bbox_head_cfg)
-    self.cuda()
-
-    feats = list()
-    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
-    # fake input_metas
-    input_metas = [{
-        'sample_idx': 1234,
-        'box_type_3d': LiDARInstance3DBoxes,
-        'box_mode_3d': Box3DMode.LIDAR
-    }, {
-        'sample_idx': 2345,
-        'box_type_3d': LiDARInstance3DBoxes,
-        'box_mode_3d': Box3DMode.LIDAR
-    }]
-    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
-
-    # test get_boxes
-    cls_score[0] -= 1.5  # too many positive samples may cause cuda oom
-    result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
-                                  input_metas)
-    assert (result_list[0][1] > 0.3).all()
-
-
-def test_parta2_rpnhead_getboxes():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    rpn_head_cfg, proposal_cfg = _get_rpn_head_cfg(
-        'parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')
-
-    self = build_head(rpn_head_cfg)
-    self.cuda()
-
-    feats = list()
-    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
-    # fake input_metas
-    input_metas = [{
-        'sample_idx': 1234,
-        'box_type_3d': LiDARInstance3DBoxes,
-        'box_mode_3d': Box3DMode.LIDAR
-    }, {
-        'sample_idx': 2345,
-        'box_type_3d': LiDARInstance3DBoxes,
-        'box_mode_3d': Box3DMode.LIDAR
-    }]
-    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
-
-    # test get_boxes
-    cls_score[0] -= 1.5  # too many positive samples may cause cuda oom
-    result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
-                                  input_metas, proposal_cfg)
-    assert result_list[0]['scores_3d'].shape == torch.Size([512])
-    assert result_list[0]['labels_3d'].shape == torch.Size([512])
-    assert result_list[0]['cls_preds'].shape == torch.Size([512, 3])
-    assert result_list[0]['boxes_3d'].tensor.shape == torch.Size([512, 7])
-
-
-def test_point_rcnn_rpnhead_getboxes():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    rpn_head_cfg, proposal_cfg = _get_pointrcnn_rpn_head_cfg(
-        './point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py')
-    self = build_head(rpn_head_cfg)
-    self.cuda()
-
-    fp_features = torch.rand([2, 128, 1024], dtype=torch.float32).cuda()
-    feats = {'fp_features': fp_features}
-    # fake input_metas
-    input_metas = [{
-        'sample_idx': 1234,
-        'box_type_3d': LiDARInstance3DBoxes,
-        'box_mode_3d': Box3DMode.LIDAR
-    }, {
-        'sample_idx': 2345,
-        'box_type_3d': LiDARInstance3DBoxes,
-        'box_mode_3d': Box3DMode.LIDAR
-    }]
-    (bbox_preds, cls_preds) = self.forward(feats)
-    assert bbox_preds.shape == (2, 1024, 8)
-    assert cls_preds.shape == (2, 1024, 3)
-    points = torch.rand([2, 1024, 3], dtype=torch.float32).cuda()
-    result_list = self.get_bboxes(points, bbox_preds, cls_preds, input_metas)
-    max_num = proposal_cfg.nms_cfg.nms_post
-    bbox, score_selected, labels, cls_preds_selected = result_list[0]
-    assert bbox.tensor.shape == (max_num, 7)
-    assert score_selected.shape == (max_num, )
-    assert labels.shape == (max_num, )
-    assert cls_preds_selected.shape == (max_num, 3)
-
-
-def test_vote_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    vote_head_cfg = _get_vote_head_cfg(
-        'votenet/votenet_8x8_scannet-3d-18class.py')
-    self = build_head(vote_head_cfg).cuda()
-    fp_xyz = [torch.rand([2, 256, 3], dtype=torch.float32).cuda()]
-    fp_features = [torch.rand([2, 256, 256], dtype=torch.float32).cuda()]
-    fp_indices = [torch.randint(0, 128, [2, 256]).cuda()]
-
-    input_dict = dict(
-        fp_xyz=fp_xyz, fp_features=fp_features, fp_indices=fp_indices)
-
-    # test forward
-    ret_dict = self(input_dict, 'vote')
-    assert ret_dict['center'].shape == torch.Size([2, 256, 3])
-    assert ret_dict['obj_scores'].shape == torch.Size([2, 256, 2])
-    assert ret_dict['size_res'].shape == torch.Size([2, 256, 18, 3])
-    assert ret_dict['dir_res'].shape == torch.Size([2, 256, 1])
-
-    # test loss
-    points = [torch.rand([40000, 4], device='cuda') for i in range(2)]
-    gt_bbox1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bbox2 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
-    gt_bboxes = [gt_bbox1, gt_bbox2]
-    gt_labels = [torch.randint(0, 18, [10], device='cuda') for i in range(2)]
-    pts_semantic_mask = [
-        torch.randint(0, 18, [40000], device='cuda') for i in range(2)
-    ]
-    pts_instance_mask = [
-        torch.randint(0, 10, [40000], device='cuda') for i in range(2)
-    ]
-    losses = self.loss(ret_dict, points, gt_bboxes, gt_labels,
-                       pts_semantic_mask, pts_instance_mask)
-    assert losses['vote_loss'] >= 0
-    assert losses['objectness_loss'] >= 0
-    assert losses['semantic_loss'] >= 0
-    assert losses['center_loss'] >= 0
-    assert losses['dir_class_loss'] >= 0
-    assert losses['dir_res_loss'] >= 0
-    assert losses['size_class_loss'] >= 0
-    assert losses['size_res_loss'] >= 0
-
-    # test multiclass_nms_single
-    obj_scores = torch.rand([256], device='cuda')
-    sem_scores = torch.rand([256, 18], device='cuda')
-    points = torch.rand([40000, 3], device='cuda')
-    bbox = torch.rand([256, 7], device='cuda')
-    input_meta = dict(box_type_3d=DepthInstance3DBoxes)
-    bbox_selected, score_selected, labels = self.multiclass_nms_single(
-        obj_scores, sem_scores, bbox, points, input_meta)
-    assert bbox_selected.shape[0] >= 0
-    assert bbox_selected.shape[1] == 7
-    assert score_selected.shape[0] >= 0
-    assert labels.shape[0] >= 0
-
-    # test get_boxes
-    points = torch.rand([1, 40000, 4], device='cuda')
-    seed_points = torch.rand([1, 1024, 3], device='cuda')
-    seed_indices = torch.randint(0, 40000, [1, 1024], device='cuda')
-    vote_points = torch.rand([1, 1024, 3], device='cuda')
-    vote_features = torch.rand([1, 256, 1024], device='cuda')
-    aggregated_points = torch.rand([1, 256, 3], device='cuda')
-    aggregated_indices = torch.range(0, 256, device='cuda')
-    obj_scores = torch.rand([1, 256, 2], device='cuda')
-    center = torch.rand([1, 256, 3], device='cuda')
-    dir_class = torch.rand([1, 256, 1], device='cuda')
-    dir_res_norm = torch.rand([1, 256, 1], device='cuda')
-    dir_res = torch.rand([1, 256, 1], device='cuda')
-    size_class = torch.rand([1, 256, 18], device='cuda')
-    size_res = torch.rand([1, 256, 18, 3], device='cuda')
-    sem_scores = torch.rand([1, 256, 18], device='cuda')
-    bbox_preds = dict(
-        seed_points=seed_points,
-        seed_indices=seed_indices,
-        vote_points=vote_points,
-        vote_features=vote_features,
-        aggregated_points=aggregated_points,
-        aggregated_indices=aggregated_indices,
-        obj_scores=obj_scores,
-        center=center,
-        dir_class=dir_class,
-        dir_res_norm=dir_res_norm,
-        dir_res=dir_res,
-        size_class=size_class,
-        size_res=size_res,
-        sem_scores=sem_scores)
-    results = self.get_bboxes(points, bbox_preds, [input_meta])
-    assert results[0][0].tensor.shape[0] >= 0
-    assert results[0][0].tensor.shape[1] == 7
-    assert results[0][1].shape[0] >= 0
-    assert results[0][2].shape[0] >= 0
-
-
-def test_smoke_mono3d_head():
-
-    head_cfg = dict(
-        type='SMOKEMono3DHead',
-        num_classes=3,
-        in_channels=64,
-        dim_channel=[3, 4, 5],
-        ori_channel=[6, 7],
-        stacked_convs=0,
-        feat_channels=64,
-        use_direction_classifier=False,
-        diff_rad_by_sin=False,
-        pred_attrs=False,
-        pred_velo=False,
-        dir_offset=0,
-        strides=None,
-        group_reg_dims=(8, ),
-        cls_branch=(256, ),
-        reg_branch=((256, ), ),
-        num_attrs=0,
-        bbox_code_size=7,
-        dir_branch=(),
-        attr_branch=(),
-        bbox_coder=dict(
-            type='SMOKECoder',
-            base_depth=(28.01, 16.32),
-            base_dims=((0.88, 1.73, 0.67), (1.78, 1.70, 0.58), (3.88, 1.63,
-                                                                1.53)),
-            code_size=7),
-        loss_cls=dict(type='GaussianFocalLoss', loss_weight=1.0),
-        loss_bbox=dict(type='L1Loss', reduction='sum', loss_weight=1 / 300),
-        loss_dir=dict(
-            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
-        loss_attr=None,
-        conv_bias=True,
-        dcn_on_last_conv=False)
-
-    self = build_head(head_cfg)
-
-    feats = [torch.rand([2, 64, 32, 32], dtype=torch.float32)]
-
-    # test forward
-    ret_dict = self(feats)
-
-    assert len(ret_dict) == 2
-    assert len(ret_dict[0]) == 1
-    assert ret_dict[0][0].shape == torch.Size([2, 3, 32, 32])
-    assert ret_dict[1][0].shape == torch.Size([2, 8, 32, 32])
-
-    # test loss
-    gt_bboxes = [
-        torch.Tensor([[1.0, 2.0, 20.0, 40.0], [45.0, 50.0, 80.0, 70.1],
-                      [34.0, 39.0, 65.0, 64.0]]),
-        torch.Tensor([[11.0, 22.0, 29.0, 31.0], [41.0, 55.0, 60.0, 99.0],
-                      [29.0, 29.0, 65.0, 56.0]])
-    ]
-    gt_bboxes_3d = [
-        CameraInstance3DBoxes(torch.rand([3, 7]), box_dim=7),
-        CameraInstance3DBoxes(torch.rand([3, 7]), box_dim=7)
-    ]
-    gt_labels = [torch.randint(0, 3, [3]) for i in range(2)]
-    gt_labels_3d = gt_labels
-    centers2d = [torch.randint(0, 60, (3, 2)), torch.randint(0, 40, (3, 2))]
-    depths = [
-        torch.rand([3], dtype=torch.float32),
-        torch.rand([3], dtype=torch.float32)
-    ]
-    attr_labels = None
-    img_metas = [
-        dict(
-            cam2img=[[1260.8474446004698, 0.0, 807.968244525554, 40.1111],
-                     [0.0, 1260.8474446004698, 495.3344268742088, 2.34422],
-                     [0.0, 0.0, 1.0, 0.00333333], [0.0, 0.0, 0.0, 1.0]],
-            scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
-            pad_shape=[128, 128],
-            trans_mat=np.array([[0.25, 0., 0.], [0., 0.25, 0], [0., 0., 1.]],
-                               dtype=np.float32),
-            affine_aug=False,
-            box_type_3d=CameraInstance3DBoxes) for i in range(2)
-    ]
-    losses = self.loss(*ret_dict, gt_bboxes, gt_labels, gt_bboxes_3d,
-                       gt_labels_3d, centers2d, depths, attr_labels, img_metas)
-
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_bbox'] >= 0
-
-    # test get_boxes
-    results = self.get_bboxes(*ret_dict, img_metas)
-    assert len(results) == 2
-    assert len(results[0]) == 4
-    assert results[0][0].tensor.shape == torch.Size([100, 7])
-    assert results[0][1].shape == torch.Size([100])
-    assert results[0][2].shape == torch.Size([100])
-    assert results[0][3] is None
-
-
-def test_parta2_bbox_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    parta2_bbox_head_cfg = _get_parta2_bbox_head_cfg(
-        './parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')
-    self = build_head(parta2_bbox_head_cfg).cuda()
-    seg_feats = torch.rand([256, 14, 14, 14, 16]).cuda()
-    part_feats = torch.rand([256, 14, 14, 14, 4]).cuda()
-
-    cls_score, bbox_pred = self.forward(seg_feats, part_feats)
-    assert cls_score.shape == (256, 1)
-    assert bbox_pred.shape == (256, 7)
-
-
-def test_point_rcnn_bbox_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    pointrcnn_bbox_head_cfg = _get_pointrcnn_bbox_head_cfg(
-        './point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py')
-    self = build_head(pointrcnn_bbox_head_cfg).cuda()
-    feats = torch.rand([100, 512, 133]).cuda()
-    rcnn_cls, rcnn_reg = self.forward(feats)
-    assert rcnn_cls.shape == (100, 1)
-    assert rcnn_reg.shape == (100, 7)
-
-
-def test_part_aggregation_ROI_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    roi_head_cfg = _get_roi_head_cfg(
-        'parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')
-    self = build_head(roi_head_cfg).cuda()
-
-    features = np.load('./tests/test_samples/parta2_roihead_inputs.npz')
-    seg_features = torch.tensor(
-        features['seg_features'], dtype=torch.float32, device='cuda')
-    feats_dict = dict(seg_features=seg_features)
-
-    voxels = torch.tensor(
-        features['voxels'], dtype=torch.float32, device='cuda')
-    num_points = torch.ones([500], device='cuda')
-    coors = torch.zeros([500, 4], device='cuda')
-    voxel_centers = torch.zeros([500, 3], device='cuda')
-    box_type_3d = LiDARInstance3DBoxes
-    img_metas = [dict(box_type_3d=box_type_3d)]
-    voxels_dict = dict(
-        voxels=voxels,
-        num_points=num_points,
-        coors=coors,
-        voxel_centers=voxel_centers)
-
-    pred_bboxes = LiDARInstance3DBoxes(
-        torch.tensor(
-            [[0.3990, 0.5167, 0.0249, 0.9401, 0.9459, 0.7967, 0.4150],
-             [0.8203, 0.2290, 0.9096, 0.1183, 0.0752, 0.4092, 0.9601],
-             [0.2093, 0.1940, 0.8909, 0.4387, 0.3570, 0.5454, 0.8299],
-             [0.2099, 0.7684, 0.4290, 0.2117, 0.6606, 0.1654, 0.4250],
-             [0.9927, 0.6964, 0.2472, 0.7028, 0.7494, 0.9303, 0.0494]],
-            dtype=torch.float32,
-            device='cuda'))
-    pred_scores = torch.tensor([0.9722, 0.7910, 0.4690, 0.3300, 0.3345],
-                               dtype=torch.float32,
-                               device='cuda')
-    pred_labels = torch.tensor([0, 1, 0, 2, 1],
-                               dtype=torch.int64,
-                               device='cuda')
-    pred_clses = torch.tensor(
-        [[0.7874, 0.1344, 0.2190], [0.8193, 0.6969, 0.7304],
-         [0.2328, 0.9028, 0.3900], [0.6177, 0.5012, 0.2330],
-         [0.8985, 0.4894, 0.7152]],
-        dtype=torch.float32,
-        device='cuda')
-    proposal = dict(
-        boxes_3d=pred_bboxes,
-        scores_3d=pred_scores,
-        labels_3d=pred_labels,
-        cls_preds=pred_clses)
-    proposal_list = [proposal]
-    gt_bboxes_3d = [LiDARInstance3DBoxes(torch.rand([5, 7], device='cuda'))]
-    gt_labels_3d = [torch.randint(0, 3, [5], device='cuda')]
-
-    losses = self.forward_train(feats_dict, voxels_dict, {}, proposal_list,
-                                gt_bboxes_3d, gt_labels_3d)
-    assert losses['loss_seg'] >= 0
-    assert losses['loss_part'] >= 0
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_bbox'] >= 0
-    assert losses['loss_corner'] >= 0
-
-    bbox_results = self.simple_test(feats_dict, voxels_dict, img_metas,
-                                    proposal_list)
-    boxes_3d = bbox_results[0]['boxes_3d']
-    scores_3d = bbox_results[0]['scores_3d']
-    labels_3d = bbox_results[0]['labels_3d']
-    assert boxes_3d.tensor.shape == (12, 7)
-    assert scores_3d.shape == (12, )
-    assert labels_3d.shape == (12, )
-
-
-def test_point_rcnn_roi_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    roi_head_cfg = _get_roi_head_cfg(
-        './point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py')
-
-    self = build_head(roi_head_cfg).cuda()
-
-    features = torch.rand([3, 128, 16384]).cuda()
-    points = torch.rand([3, 16384, 3]).cuda()
-    points_cls_preds = torch.rand([3, 16384, 3]).cuda()
-    rcnn_feats = {
-        'features': features,
-        'points': points,
-        'points_cls_preds': points_cls_preds
-    }
-    boxes_3d = LiDARInstance3DBoxes(torch.rand(50, 7).cuda())
-    labels_3d = torch.randint(low=0, high=2, size=[50]).cuda()
-    proposal = {'boxes_3d': boxes_3d, 'labels_3d': labels_3d}
-    proposal_list = [proposal for i in range(3)]
-    gt_bboxes_3d = [
-        LiDARInstance3DBoxes(torch.rand([5, 7], device='cuda'))
-        for i in range(3)
-    ]
-    gt_labels_3d = [torch.randint(0, 2, [5], device='cuda') for i in range(3)]
-    box_type_3d = LiDARInstance3DBoxes
-    img_metas = [dict(box_type_3d=box_type_3d) for i in range(3)]
-
-    losses = self.forward_train(rcnn_feats, img_metas, proposal_list,
-                                gt_bboxes_3d, gt_labels_3d)
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_bbox'] >= 0
-    assert losses['loss_corner'] >= 0
-
-    bbox_results = self.simple_test(rcnn_feats, img_metas, proposal_list)
-    boxes_3d = bbox_results[0]['boxes_3d']
-    scores_3d = bbox_results[0]['scores_3d']
-    labels_3d = bbox_results[0]['labels_3d']
-    assert boxes_3d.tensor.shape[1] == 7
-    assert boxes_3d.tensor.shape[0] == scores_3d.shape[0]
-    assert scores_3d.shape[0] == labels_3d.shape[0]
-
-
-def test_free_anchor_3D_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    pts_bbox_head_cfg = _get_pts_bbox_head_cfg(
-        './free_anchor/hv_pointpillars_fpn_sbn-all_'
-        'free-anchor_4x8_2x_nus-3d.py')
-    self = build_head(pts_bbox_head_cfg)
-    cls_scores = [
-        torch.rand([4, 80, 200, 200], device='cuda') for i in range(3)
-    ]
-    bbox_preds = [
-        torch.rand([4, 72, 200, 200], device='cuda') for i in range(3)
-    ]
-    dir_cls_preds = [
-        torch.rand([4, 16, 200, 200], device='cuda') for i in range(3)
-    ]
-    gt_bboxes = [
-        LiDARInstance3DBoxes(torch.rand([8, 9], device='cuda'), box_dim=9)
-        for i in range(4)
-    ]
-    gt_labels = [
-        torch.randint(0, 10, [8], device='cuda', dtype=torch.long)
-        for i in range(4)
-    ]
-    input_metas = [0]
-    losses = self.loss(cls_scores, bbox_preds, dir_cls_preds, gt_bboxes,
-                       gt_labels, input_metas, None)
-    assert losses['positive_bag_loss'] >= 0
-    assert losses['negative_bag_loss'] >= 0
-
-
-def test_primitive_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-
-    primitive_head_cfg = dict(
-        type='PrimitiveHead',
-        num_dims=2,
-        num_classes=18,
-        primitive_mode='z',
-        vote_module_cfg=dict(
-            in_channels=256,
-            vote_per_seed=1,
-            gt_per_seed=1,
-            conv_channels=(256, 256),
-            conv_cfg=dict(type='Conv1d'),
-            norm_cfg=dict(type='BN1d'),
-            norm_feats=True,
-            vote_loss=dict(
-                type='ChamferDistance',
-                mode='l1',
-                reduction='none',
-                loss_dst_weight=10.0)),
-        vote_aggregation_cfg=dict(
-            type='PointSAModule',
-            num_point=64,
-            radius=0.3,
-            num_sample=16,
-            mlp_channels=[256, 128, 128, 128],
-            use_xyz=True,
-            normalize_xyz=True),
-        feat_channels=(128, 128),
-        conv_cfg=dict(type='Conv1d'),
-        norm_cfg=dict(type='BN1d'),
-        objectness_loss=dict(
-            type='CrossEntropyLoss',
-            class_weight=[0.4, 0.6],
-            reduction='mean',
-            loss_weight=1.0),
-        center_loss=dict(
-            type='ChamferDistance',
-            mode='l1',
-            reduction='sum',
-            loss_src_weight=1.0,
-            loss_dst_weight=1.0),
-        semantic_reg_loss=dict(
-            type='ChamferDistance',
-            mode='l1',
-            reduction='sum',
-            loss_src_weight=1.0,
-            loss_dst_weight=1.0),
-        semantic_cls_loss=dict(
-            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
-        train_cfg=dict(
-            dist_thresh=0.2,
-            var_thresh=1e-2,
-            lower_thresh=1e-6,
-            num_point=100,
-            num_point_line=10,
-            line_thresh=0.2))
-
-    self = build_head(primitive_head_cfg).cuda()
-    fp_xyz = [torch.rand([2, 64, 3], dtype=torch.float32).cuda()]
-    hd_features = torch.rand([2, 256, 64], dtype=torch.float32).cuda()
-    fp_indices = [torch.randint(0, 64, [2, 64]).cuda()]
-    input_dict = dict(
-        fp_xyz_net0=fp_xyz, hd_feature=hd_features, fp_indices_net0=fp_indices)
-
-    # test forward
-    ret_dict = self(input_dict, 'vote')
-    assert ret_dict['center_z'].shape == torch.Size([2, 64, 3])
-    assert ret_dict['size_residuals_z'].shape == torch.Size([2, 64, 2])
-    assert ret_dict['sem_cls_scores_z'].shape == torch.Size([2, 64, 18])
-    assert ret_dict['aggregated_points_z'].shape == torch.Size([2, 64, 3])
-
-    # test loss
-    points = torch.rand([2, 1024, 3], dtype=torch.float32).cuda()
-    ret_dict['seed_points'] = fp_xyz[0]
-    ret_dict['seed_indices'] = fp_indices[0]
-
-    from mmdet3d.core.bbox import DepthInstance3DBoxes
-    gt_bboxes_3d = [
-        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()),
-        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda())
-    ]
-    gt_labels_3d = torch.randint(0, 18, [2, 4]).cuda()
-    gt_labels_3d = [gt_labels_3d[0], gt_labels_3d[1]]
-    pts_semantic_mask = torch.randint(0, 19, [2, 1024]).cuda()
-    pts_semantic_mask = [pts_semantic_mask[0], pts_semantic_mask[1]]
-    pts_instance_mask = torch.randint(0, 4, [2, 1024]).cuda()
-    pts_instance_mask = [pts_instance_mask[0], pts_instance_mask[1]]
-
-    loss_input_dict = dict(
-        bbox_preds=ret_dict,
-        points=points,
-        gt_bboxes_3d=gt_bboxes_3d,
-        gt_labels_3d=gt_labels_3d,
-        pts_semantic_mask=pts_semantic_mask,
-        pts_instance_mask=pts_instance_mask)
-    losses_dict = self.loss(**loss_input_dict)
-
-    assert losses_dict['flag_loss_z'] >= 0
-    assert losses_dict['vote_loss_z'] >= 0
-    assert losses_dict['center_loss_z'] >= 0
-    assert losses_dict['size_loss_z'] >= 0
-    assert losses_dict['sem_loss_z'] >= 0
-
-    # 'Primitive_mode' should be one of ['z', 'xy', 'line']
-    with pytest.raises(AssertionError):
-        primitive_head_cfg['vote_module_cfg']['in_channels'] = 'xyz'
-        build_head(primitive_head_cfg)
-
-
-def test_h3d_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-
-    h3d_head_cfg = _get_roi_head_cfg('h3dnet/h3dnet_3x8_scannet-3d-18class.py')
-
-    num_point = 128
-    num_proposal = 64
-    h3d_head_cfg.primitive_list[0].vote_aggregation_cfg.num_point = num_point
-    h3d_head_cfg.primitive_list[1].vote_aggregation_cfg.num_point = num_point
-    h3d_head_cfg.primitive_list[2].vote_aggregation_cfg.num_point = num_point
-    h3d_head_cfg.bbox_head.num_proposal = num_proposal
-    self = build_head(h3d_head_cfg).cuda()
-
-    # prepare RoI outputs
-    fp_xyz = [torch.rand([1, num_point, 3], dtype=torch.float32).cuda()]
-    hd_features = torch.rand([1, 256, num_point], dtype=torch.float32).cuda()
-    fp_indices = [torch.randint(0, 128, [1, num_point]).cuda()]
-    aggregated_points = torch.rand([1, num_proposal, 3],
-                                   dtype=torch.float32).cuda()
-    aggregated_features = torch.rand([1, 128, num_proposal],
-                                     dtype=torch.float32).cuda()
-    proposal_list = torch.cat([
-        torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() * 4 - 2,
-        torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() * 4,
-        torch.zeros([1, num_proposal, 1]).cuda()
-    ],
-                              dim=-1)
-
-    input_dict = dict(
-        fp_xyz_net0=fp_xyz,
-        hd_feature=hd_features,
-        aggregated_points=aggregated_points,
-        aggregated_features=aggregated_features,
-        seed_points=fp_xyz[0],
-        seed_indices=fp_indices[0],
-        proposal_list=proposal_list)
-
-    # prepare gt label
-    from mmdet3d.core.bbox import DepthInstance3DBoxes
-    gt_bboxes_3d = [
-        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()),
-        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda())
-    ]
-    gt_labels_3d = torch.randint(0, 18, [1, 4]).cuda()
-    gt_labels_3d = [gt_labels_3d[0]]
-    pts_semantic_mask = torch.randint(0, 19, [1, num_point]).cuda()
-    pts_semantic_mask = [pts_semantic_mask[0]]
-    pts_instance_mask = torch.randint(0, 4, [1, num_point]).cuda()
-    pts_instance_mask = [pts_instance_mask[0]]
-    points = torch.rand([1, num_point, 3], dtype=torch.float32).cuda()
-
-    # prepare rpn targets
-    vote_targets = torch.rand([1, num_point, 9], dtype=torch.float32).cuda()
-    vote_target_masks = torch.rand([1, num_point], dtype=torch.float32).cuda()
-    size_class_targets = torch.rand([1, num_proposal],
-                                    dtype=torch.float32).cuda().long()
-    size_res_targets = torch.rand([1, num_proposal, 3],
-                                  dtype=torch.float32).cuda()
-    dir_class_targets = torch.rand([1, num_proposal],
-                                   dtype=torch.float32).cuda().long()
-    dir_res_targets = torch.rand([1, num_proposal], dtype=torch.float32).cuda()
-    center_targets = torch.rand([1, 4, 3], dtype=torch.float32).cuda()
-    mask_targets = torch.rand([1, num_proposal],
-                              dtype=torch.float32).cuda().long()
-    valid_gt_masks = torch.rand([1, 4], dtype=torch.float32).cuda()
-    objectness_targets = torch.rand([1, num_proposal],
-                                    dtype=torch.float32).cuda().long()
-    objectness_weights = torch.rand([1, num_proposal],
-                                    dtype=torch.float32).cuda()
-    box_loss_weights = torch.rand([1, num_proposal],
-                                  dtype=torch.float32).cuda()
-    valid_gt_weights = torch.rand([1, 4], dtype=torch.float32).cuda()
-
-    targets = (vote_targets, vote_target_masks, size_class_targets,
-               size_res_targets, dir_class_targets, dir_res_targets,
-               center_targets, None, mask_targets, valid_gt_masks,
-               objectness_targets, objectness_weights, box_loss_weights,
-               valid_gt_weights)
-
-    input_dict['targets'] = targets
-
-    # train forward
-    ret_dict = self.forward_train(
-        input_dict,
-        points=points,
-        gt_bboxes_3d=gt_bboxes_3d,
-        gt_labels_3d=gt_labels_3d,
-        pts_semantic_mask=pts_semantic_mask,
-        pts_instance_mask=pts_instance_mask,
-        img_metas=None)
-
-    assert ret_dict['flag_loss_z'] >= 0
-    assert ret_dict['vote_loss_z'] >= 0
-    assert ret_dict['center_loss_z'] >= 0
-    assert ret_dict['size_loss_z'] >= 0
-    assert ret_dict['sem_loss_z'] >= 0
-    assert ret_dict['objectness_loss_optimized'] >= 0
-    assert ret_dict['primitive_sem_matching_loss'] >= 0
-
-
-def test_center_head():
-    tasks = [
-        dict(num_class=1, class_names=['car']),
-        dict(num_class=2, class_names=['truck', 'construction_vehicle']),
-        dict(num_class=2, class_names=['bus', 'trailer']),
-        dict(num_class=1, class_names=['barrier']),
-        dict(num_class=2, class_names=['motorcycle', 'bicycle']),
-        dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
-    ]
-    bbox_cfg = dict(
-        type='CenterPointBBoxCoder',
-        post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
-        max_num=500,
-        score_threshold=0.1,
-        pc_range=[-51.2, -51.2],
-        out_size_factor=8,
-        voxel_size=[0.2, 0.2])
-    train_cfg = dict(
-        grid_size=[1024, 1024, 40],
-        point_cloud_range=[-51.2, -51.2, -5., 51.2, 51.2, 3.],
-        voxel_size=[0.1, 0.1, 0.2],
-        out_size_factor=8,
-        dense_reg=1,
-        gaussian_overlap=0.1,
-        max_objs=500,
-        code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 1.0, 1.0],
-        min_radius=2)
-    test_cfg = dict(
-        post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
-        max_per_img=500,
-        max_pool_nms=False,
-        min_radius=[4, 12, 10, 1, 0.85, 0.175],
-        post_max_size=83,
-        score_threshold=0.1,
-        pc_range=[-51.2, -51.2],
-        out_size_factor=8,
-        voxel_size=[0.2, 0.2],
-        nms_type='circle')
-    center_head_cfg = dict(
-        type='CenterHead',
-        in_channels=sum([256, 256]),
-        tasks=tasks,
-        train_cfg=train_cfg,
-        test_cfg=test_cfg,
-        bbox_coder=bbox_cfg,
-        common_heads=dict(
-            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
-        share_conv_channel=64,
-        norm_bbox=True)
-
-    center_head = build_head(center_head_cfg)
-
-    x = torch.rand([2, 512, 128, 128])
-    output = center_head([x])
-    for i in range(6):
-        assert output[i][0]['reg'].shape == torch.Size([2, 2, 128, 128])
-        assert output[i][0]['height'].shape == torch.Size([2, 1, 128, 128])
-        assert output[i][0]['dim'].shape == torch.Size([2, 3, 128, 128])
-        assert output[i][0]['rot'].shape == torch.Size([2, 2, 128, 128])
-        assert output[i][0]['vel'].shape == torch.Size([2, 2, 128, 128])
-        assert output[i][0]['heatmap'].shape == torch.Size(
-            [2, tasks[i]['num_class'], 128, 128])
-
-    # test get_bboxes
-    img_metas = [
-        dict(box_type_3d=LiDARInstance3DBoxes),
-        dict(box_type_3d=LiDARInstance3DBoxes)
-    ]
-    ret_lists = center_head.get_bboxes(output, img_metas)
-    for ret_list in ret_lists:
-        assert ret_list[0].tensor.shape[0] <= 500
-        assert ret_list[1].shape[0] <= 500
-        assert ret_list[2].shape[0] <= 500
-
-
-def test_dcn_center_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and CUDA')
-    set_random_seed(0)
-    tasks = [
-        dict(num_class=1, class_names=['car']),
-        dict(num_class=2, class_names=['truck', 'construction_vehicle']),
-        dict(num_class=2, class_names=['bus', 'trailer']),
-        dict(num_class=1, class_names=['barrier']),
-        dict(num_class=2, class_names=['motorcycle', 'bicycle']),
-        dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
-    ]
-    voxel_size = [0.2, 0.2, 8]
-    dcn_center_head_cfg = dict(
-        type='CenterHead',
-        in_channels=sum([128, 128, 128]),
-        tasks=[
-            dict(num_class=1, class_names=['car']),
-            dict(num_class=2, class_names=['truck', 'construction_vehicle']),
-            dict(num_class=2, class_names=['bus', 'trailer']),
-            dict(num_class=1, class_names=['barrier']),
-            dict(num_class=2, class_names=['motorcycle', 'bicycle']),
-            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
-        ],
-        common_heads={
-            'reg': (2, 2),
-            'height': (1, 2),
-            'dim': (3, 2),
-            'rot': (2, 2),
-            'vel': (2, 2)
-        },
-        share_conv_channel=64,
-        bbox_coder=dict(
-            type='CenterPointBBoxCoder',
-            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
-            max_num=500,
-            score_threshold=0.1,
-            pc_range=[-51.2, -51.2],
-            out_size_factor=4,
-            voxel_size=voxel_size[:2],
-            code_size=9),
-        separate_head=dict(
-            type='DCNSeparateHead',
-            dcn_config=dict(
-                type='DCN',
-                in_channels=64,
-                out_channels=64,
-                kernel_size=3,
-                padding=1,
-                groups=4,
-                bias=False),  # mmcv 1.2.6 doesn't support bias=True anymore
-            init_bias=-2.19,
-            final_kernel=3),
-        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
-        loss_bbox=dict(type='L1Loss', reduction='none', loss_weight=0.25),
-        norm_bbox=True)
-    # model training and testing settings
-    train_cfg = dict(
-        grid_size=[512, 512, 1],
-        point_cloud_range=[-51.2, -51.2, -5., 51.2, 51.2, 3.],
-        voxel_size=voxel_size,
-        out_size_factor=4,
-        dense_reg=1,
-        gaussian_overlap=0.1,
-        max_objs=500,
-        min_radius=2,
-        code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 1.0, 1.0])
-
-    test_cfg = dict(
-        post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
-        max_per_img=500,
-        max_pool_nms=False,
-        min_radius=[4, 12, 10, 1, 0.85, 0.175],
-        post_max_size=83,
-        score_threshold=0.1,
-        pc_range=[-51.2, -51.2],
-        out_size_factor=4,
-        voxel_size=voxel_size[:2],
-        nms_type='circle')
-    dcn_center_head_cfg.update(train_cfg=train_cfg, test_cfg=test_cfg)
-
-    dcn_center_head = build_head(dcn_center_head_cfg).cuda()
-
-    x = torch.ones([2, 384, 128, 128]).cuda()
-    output = dcn_center_head([x])
-    for i in range(6):
-        assert output[i][0]['reg'].shape == torch.Size([2, 2, 128, 128])
-        assert output[i][0]['height'].shape == torch.Size([2, 1, 128, 128])
-        assert output[i][0]['dim'].shape == torch.Size([2, 3, 128, 128])
-        assert output[i][0]['rot'].shape == torch.Size([2, 2, 128, 128])
-        assert output[i][0]['vel'].shape == torch.Size([2, 2, 128, 128])
-        assert output[i][0]['heatmap'].shape == torch.Size(
-            [2, tasks[i]['num_class'], 128, 128])
-
-    # Test loss.
-    gt_bboxes_0 = LiDARInstance3DBoxes(torch.rand([10, 9]).cuda(), box_dim=9)
-    gt_bboxes_1 = LiDARInstance3DBoxes(torch.rand([20, 9]).cuda(), box_dim=9)
-    gt_labels_0 = torch.randint(1, 11, [10]).cuda()
-    gt_labels_1 = torch.randint(1, 11, [20]).cuda()
-    gt_bboxes_3d = [gt_bboxes_0, gt_bboxes_1]
-    gt_labels_3d = [gt_labels_0, gt_labels_1]
-    loss = dcn_center_head.loss(gt_bboxes_3d, gt_labels_3d, output)
-    for key, item in loss.items():
-        if 'heatmap' in key:
-            assert item >= 0
-        else:
-            assert torch.sum(item) >= 0
-
-    # test get_bboxes
-    img_metas = [
-        dict(box_type_3d=LiDARInstance3DBoxes),
-        dict(box_type_3d=LiDARInstance3DBoxes)
-    ]
-    ret_lists = dcn_center_head.get_bboxes(output, img_metas)
-    for ret_list in ret_lists:
-        assert ret_list[0].tensor.shape[0] <= 500
-        assert ret_list[1].shape[0] <= 500
-        assert ret_list[2].shape[0] <= 500
-
-
-def test_ssd3d_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    ssd3d_head_cfg = _get_vote_head_cfg('3dssd/3dssd_4x4_kitti-3d-car.py')
-    ssd3d_head_cfg.vote_module_cfg.num_points = 64
-    self = build_head(ssd3d_head_cfg).cuda()
-    sa_xyz = [torch.rand([2, 128, 3], dtype=torch.float32).cuda()]
-    sa_features = [torch.rand([2, 256, 128], dtype=torch.float32).cuda()]
-    sa_indices = [torch.randint(0, 64, [2, 128]).cuda()]
-
-    input_dict = dict(
-        sa_xyz=sa_xyz, sa_features=sa_features, sa_indices=sa_indices)
-
-    # test forward
-    ret_dict = self(input_dict, 'spec')
-    assert ret_dict['center'].shape == torch.Size([2, 64, 3])
-    assert ret_dict['obj_scores'].shape == torch.Size([2, 1, 64])
-    assert ret_dict['size'].shape == torch.Size([2, 64, 3])
-    assert ret_dict['dir_res'].shape == torch.Size([2, 64, 12])
-
-    # test loss
-    points = [torch.rand([4000, 3], device='cuda') for i in range(2)]
-    gt_bbox1 = LiDARInstance3DBoxes(torch.rand([5, 7], device='cuda'))
-    gt_bbox2 = LiDARInstance3DBoxes(torch.rand([5, 7], device='cuda'))
-    gt_bboxes = [gt_bbox1, gt_bbox2]
-    gt_labels = [
-        torch.zeros([5], dtype=torch.long, device='cuda') for i in range(2)
-    ]
-    img_metas = [dict(box_type_3d=LiDARInstance3DBoxes) for i in range(2)]
-    losses = self.loss(
-        ret_dict, points, gt_bboxes, gt_labels, img_metas=img_metas)
-
-    assert losses['centerness_loss'] >= 0
-    assert losses['center_loss'] >= 0
-    assert losses['dir_class_loss'] >= 0
-    assert losses['dir_res_loss'] >= 0
-    assert losses['size_res_loss'] >= 0
-    assert losses['corner_loss'] >= 0
-    assert losses['vote_loss'] >= 0
-
-    # test multiclass_nms_single
-    sem_scores = ret_dict['obj_scores'].transpose(1, 2)[0]
-    obj_scores = sem_scores.max(-1)[0]
-    bbox = self.bbox_coder.decode(ret_dict)[0]
-    input_meta = img_metas[0]
-    bbox_selected, score_selected, labels = self.multiclass_nms_single(
-        obj_scores, sem_scores, bbox, points[0], input_meta)
-    assert bbox_selected.shape[0] >= 0
-    assert bbox_selected.shape[1] == 7
-    assert score_selected.shape[0] >= 0
-    assert labels.shape[0] >= 0
-
-    # test get_boxes
-    points = torch.stack(points, 0)
-    results = self.get_bboxes(points, ret_dict, img_metas)
-    assert results[0][0].tensor.shape[0] >= 0
-    assert results[0][0].tensor.shape[1] == 7
-    assert results[0][1].shape[0] >= 0
-    assert results[0][2].shape[0] >= 0
-
-
-def test_shape_aware_head_loss():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    bbox_head_cfg = _get_pts_bbox_head_cfg(
-        'ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py')
-    # modify bn config to avoid bugs caused by syncbn
-    for task in bbox_head_cfg['tasks']:
-        task['norm_cfg'] = dict(type='BN2d')
-
-    from mmdet3d.models.builder import build_head
-    self = build_head(bbox_head_cfg)
-    self.cuda()
-    assert len(self.heads) == 4
-    assert isinstance(self.heads[0].conv_cls, torch.nn.modules.conv.Conv2d)
-    assert self.heads[0].conv_cls.in_channels == 64
-    assert self.heads[0].conv_cls.out_channels == 36
-    assert self.heads[0].conv_reg.out_channels == 28
-    assert self.heads[0].conv_dir_cls.out_channels == 8
-
-    # test forward
-    feats = list()
-    feats.append(torch.rand([2, 384, 200, 200], dtype=torch.float32).cuda())
-    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
-    assert cls_score[0].shape == torch.Size([2, 420000, 9])
-    assert bbox_pred[0].shape == torch.Size([2, 420000, 7])
-    assert dir_cls_preds[0].shape == torch.Size([2, 420000, 2])
-
-    # test loss
-    gt_bboxes = [
-        LiDARInstance3DBoxes(
-            torch.tensor(
-                [[-14.5695, -6.4169, -2.1054, 1.8830, 4.6720, 1.4840, 1.5587],
-                 [25.7215, 3.4581, -1.3456, 1.6720, 4.4090, 1.5830, 1.5301]],
-                dtype=torch.float32).cuda()),
-        LiDARInstance3DBoxes(
-            torch.tensor(
-                [[-50.763, -3.5517, -0.99658, 1.7430, 4.4020, 1.6990, 1.7874],
-                 [-68.720, 0.033, -0.75276, 1.7860, 4.9100, 1.6610, 1.7525]],
-                dtype=torch.float32).cuda())
-    ]
-    gt_labels = list(torch.tensor([[4, 4], [4, 4]], dtype=torch.int64).cuda())
-    input_metas = [{
-        'sample_idx': 1234
-    }, {
-        'sample_idx': 2345
-    }]  # fake input_metas
-
-    losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
-                       gt_labels, input_metas)
-
-    assert losses['loss_cls'][0] > 0
-    assert losses['loss_bbox'][0] > 0
-    assert losses['loss_dir'][0] > 0
-
-    # test empty ground truth case
-    gt_bboxes = list(torch.empty((2, 0, 7)).cuda())
-    gt_labels = list(torch.empty((2, 0)).cuda())
-    empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
-                                gt_labels, input_metas)
-    assert empty_gt_losses['loss_cls'][0] > 0
-    assert empty_gt_losses['loss_bbox'][0] == 0
-    assert empty_gt_losses['loss_dir'][0] == 0
-
-
-def test_shape_aware_head_getboxes():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    bbox_head_cfg = _get_pts_bbox_head_cfg(
-        'ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py')
-    # modify bn config to avoid bugs caused by syncbn
-    for task in bbox_head_cfg['tasks']:
-        task['norm_cfg'] = dict(type='BN2d')
-
-    from mmdet3d.models.builder import build_head
-    self = build_head(bbox_head_cfg)
-    self.cuda()
-
-    feats = list()
-    feats.append(torch.rand([2, 384, 200, 200], dtype=torch.float32).cuda())
-    # fake input_metas
-    input_metas = [{
-        'sample_idx': 1234,
-        'box_type_3d': LiDARInstance3DBoxes,
-        'box_mode_3d': Box3DMode.LIDAR
-    }, {
-        'sample_idx': 2345,
-        'box_type_3d': LiDARInstance3DBoxes,
-        'box_mode_3d': Box3DMode.LIDAR
-    }]
-    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
-
-    # test get_bboxes
-    cls_score[0] -= 1.5  # too many positive samples may cause cuda oom
-    result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
-                                  input_metas)
-    assert len(result_list[0][1]) > 0  # ensure not all boxes are filtered
-    assert (result_list[0][1] > 0.3).all()
-
-
-def test_fcos_mono3d_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    fcos3d_head_cfg = _get_head_cfg(
-        'fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py')
-    self = build_head(fcos3d_head_cfg).cuda()
-
-    feats = [
-        torch.rand([2, 256, 116, 200], dtype=torch.float32).cuda(),
-        torch.rand([2, 256, 58, 100], dtype=torch.float32).cuda(),
-        torch.rand([2, 256, 29, 50], dtype=torch.float32).cuda(),
-        torch.rand([2, 256, 15, 25], dtype=torch.float32).cuda(),
-        torch.rand([2, 256, 8, 13], dtype=torch.float32).cuda()
-    ]
-
-    # test forward
-    ret_dict = self(feats)
-    assert len(ret_dict) == 5
-    assert len(ret_dict[0]) == 5
-    assert ret_dict[0][0].shape == torch.Size([2, 10, 116, 200])
-
-    # test loss
-    gt_bboxes = [
-        torch.rand([3, 4], dtype=torch.float32).cuda(),
-        torch.rand([3, 4], dtype=torch.float32).cuda()
-    ]
-    gt_bboxes_3d = CameraInstance3DBoxes(
-        torch.rand([3, 9], device='cuda'), box_dim=9)
-    gt_labels = [torch.randint(0, 10, [3], device='cuda') for i in range(2)]
-    gt_labels_3d = gt_labels
-    centers2d = [
-        torch.rand([3, 2], dtype=torch.float32).cuda(),
-        torch.rand([3, 2], dtype=torch.float32).cuda()
-    ]
-    depths = [
-        torch.rand([3], dtype=torch.float32).cuda(),
-        torch.rand([3], dtype=torch.float32).cuda()
-    ]
-    attr_labels = [torch.randint(0, 9, [3], device='cuda') for i in range(2)]
-    img_metas = [
-        dict(
-            cam2img=[[1260.8474446004698, 0.0, 807.968244525554],
-                     [0.0, 1260.8474446004698, 495.3344268742088],
-                     [0.0, 0.0, 1.0]],
-            scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
-            box_type_3d=CameraInstance3DBoxes) for i in range(2)
-    ]
-    losses = self.loss(*ret_dict, gt_bboxes, gt_labels, gt_bboxes_3d,
-                       gt_labels_3d, centers2d, depths, attr_labels, img_metas)
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_offset'] >= 0
-    assert losses['loss_depth'] >= 0
-    assert losses['loss_size'] >= 0
-    assert losses['loss_rotsin'] >= 0
-    assert losses['loss_centerness'] >= 0
-    assert losses['loss_velo'] >= 0
-    assert losses['loss_dir'] >= 0
-    assert losses['loss_attr'] >= 0
-
-    # test get_boxes
-    results = self.get_bboxes(*ret_dict, img_metas)
-    assert len(results) == 2
-    assert len(results[0]) == 4
-    assert results[0][0].tensor.shape == torch.Size([200, 9])
-    assert results[0][1].shape == torch.Size([200])
-    assert results[0][2].shape == torch.Size([200])
-    assert results[0][3].shape == torch.Size([200])
-
-
-def test_groupfree3d_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    vote_head_cfg = _get_vote_head_cfg(
-        'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py')
-    self = build_head(vote_head_cfg).cuda()
-
-    fp_xyz = [torch.rand([2, 256, 3], dtype=torch.float32).cuda()]
-    fp_features = [torch.rand([2, 288, 256], dtype=torch.float32).cuda()]
-    fp_indices = [torch.randint(0, 128, [2, 256]).cuda()]
-
-    input_dict = dict(
-        fp_xyz=fp_xyz, fp_features=fp_features, fp_indices=fp_indices)
-
-    # test forward
-    ret_dict = self(input_dict, 'kps')
-    assert ret_dict['seeds_obj_cls_logits'].shape == torch.Size([2, 1, 256])
-    assert ret_dict['s5.center'].shape == torch.Size([2, 256, 3])
-    assert ret_dict['s5.dir_class'].shape == torch.Size([2, 256, 1])
-    assert ret_dict['s5.dir_res'].shape == torch.Size([2, 256, 1])
-    assert ret_dict['s5.size_class'].shape == torch.Size([2, 256, 18])
-    assert ret_dict['s5.size_res'].shape == torch.Size([2, 256, 18, 3])
-    assert ret_dict['s5.obj_scores'].shape == torch.Size([2, 256, 1])
-    assert ret_dict['s5.sem_scores'].shape == torch.Size([2, 256, 18])
-
-    # test losses
-    points = [torch.rand([5000, 4], device='cuda') for i in range(2)]
-    gt_bbox1 = torch.rand([10, 7], dtype=torch.float32).cuda()
-    gt_bbox2 = torch.rand([10, 7], dtype=torch.float32).cuda()
-
-    gt_bbox1 = DepthInstance3DBoxes(gt_bbox1)
-    gt_bbox2 = DepthInstance3DBoxes(gt_bbox2)
-    gt_bboxes = [gt_bbox1, gt_bbox2]
-
-    pts_instance_mask_1 = torch.randint(0, 10, [5000], device='cuda')
-    pts_instance_mask_2 = torch.randint(0, 10, [5000], device='cuda')
-    pts_instance_mask = [pts_instance_mask_1, pts_instance_mask_2]
-
-    pts_semantic_mask_1 = torch.randint(0, 19, [5000], device='cuda')
-    pts_semantic_mask_2 = torch.randint(0, 19, [5000], device='cuda')
-    pts_semantic_mask = [pts_semantic_mask_1, pts_semantic_mask_2]
-
-    labels_1 = torch.randint(0, 18, [10], device='cuda')
-    labels_2 = torch.randint(0, 18, [10], device='cuda')
-    gt_labels = [labels_1, labels_2]
-
-    losses = self.loss(ret_dict, points, gt_bboxes, gt_labels,
-                       pts_semantic_mask, pts_instance_mask)
-
-    assert losses['s5.objectness_loss'] >= 0
-    assert losses['s5.semantic_loss'] >= 0
-    assert losses['s5.center_loss'] >= 0
-    assert losses['s5.dir_class_loss'] >= 0
-    assert losses['s5.dir_res_loss'] >= 0
-    assert losses['s5.size_class_loss'] >= 0
-    assert losses['s5.size_res_loss'] >= 0
-
-    # test multiclass_nms_single
-    obj_scores = torch.rand([256], device='cuda')
-    sem_scores = torch.rand([256, 18], device='cuda')
-    points = torch.rand([5000, 3], device='cuda')
-    bbox = torch.rand([256, 7], device='cuda')
-    input_meta = dict(box_type_3d=DepthInstance3DBoxes)
-    bbox_selected, score_selected, labels = \
-        self.multiclass_nms_single(obj_scores,
-                                   sem_scores,
-                                   bbox,
-                                   points,
-                                   input_meta)
-    assert bbox_selected.shape[0] >= 0
-    assert bbox_selected.shape[1] == 7
-    assert score_selected.shape[0] >= 0
-    assert labels.shape[0] >= 0
-
-    # test get_boxes
-    points = torch.rand([1, 5000, 3], device='cuda')
-    seed_points = torch.rand([1, 1024, 3], device='cuda')
-    seed_indices = torch.randint(0, 5000, [1, 1024], device='cuda')
-    obj_scores = torch.rand([1, 256, 1], device='cuda')
-    center = torch.rand([1, 256, 3], device='cuda')
-    dir_class = torch.rand([1, 256, 1], device='cuda')
-    dir_res_norm = torch.rand([1, 256, 1], device='cuda')
-    dir_res = torch.rand([1, 256, 1], device='cuda')
-    size_class = torch.rand([1, 256, 18], device='cuda')
-    size_res = torch.rand([1, 256, 18, 3], device='cuda')
-    sem_scores = torch.rand([1, 256, 18], device='cuda')
-    bbox_preds = dict()
-    bbox_preds['seed_points'] = seed_points
-    bbox_preds['seed_indices'] = seed_indices
-    bbox_preds['s5.obj_scores'] = obj_scores
-    bbox_preds['s5.center'] = center
-    bbox_preds['s5.dir_class'] = dir_class
-    bbox_preds['s5.dir_res_norm'] = dir_res_norm
-    bbox_preds['s5.dir_res'] = dir_res
-    bbox_preds['s5.size_class'] = size_class
-    bbox_preds['s5.size_res'] = size_res
-    bbox_preds['s5.sem_scores'] = sem_scores
-
-    self.test_cfg['prediction_stages'] = 'last'
-    results = self.get_bboxes(points, bbox_preds, [input_meta])
-    assert results[0][0].tensor.shape[0] >= 0
-    assert results[0][0].tensor.shape[1] == 7
-    assert results[0][1].shape[0] >= 0
-    assert results[0][2].shape[0] >= 0
-
-
-def test_pgd_head():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    _setup_seed(0)
-    pgd_head_cfg = _get_head_cfg(
-        'pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py')
-    self = build_head(pgd_head_cfg).cuda()
-
-    feats = [
-        torch.rand([2, 256, 96, 312], dtype=torch.float32).cuda(),
-        torch.rand([2, 256, 48, 156], dtype=torch.float32).cuda(),
-        torch.rand([2, 256, 24, 78], dtype=torch.float32).cuda(),
-        torch.rand([2, 256, 12, 39], dtype=torch.float32).cuda(),
-    ]
-
-    # test forward
-    ret_dict = self(feats)
-    assert len(ret_dict) == 7
-    assert len(ret_dict[0]) == 4
-    assert ret_dict[0][0].shape == torch.Size([2, 3, 96, 312])
-
-    # test loss
-    gt_bboxes = [
-        torch.rand([3, 4], dtype=torch.float32).cuda(),
-        torch.rand([3, 4], dtype=torch.float32).cuda()
-    ]
-    gt_bboxes_3d = CameraInstance3DBoxes(
-        torch.rand([3, 7], device='cuda'), box_dim=7)
-    gt_labels = [torch.randint(0, 3, [3], device='cuda') for i in range(2)]
-    gt_labels_3d = gt_labels
-    centers2d = [
-        torch.rand([3, 2], dtype=torch.float32).cuda(),
-        torch.rand([3, 2], dtype=torch.float32).cuda()
-    ]
-    depths = [
-        torch.rand([3], dtype=torch.float32).cuda(),
-        torch.rand([3], dtype=torch.float32).cuda()
-    ]
-    attr_labels = None
-    img_metas = [
-        dict(
-            img_shape=[384, 1248],
-            cam2img=[[721.5377, 0.0, 609.5593, 44.85728],
-                     [0.0, 721.5377, 172.854, 0.2163791],
-                     [0.0, 0.0, 1.0, 0.002745884], [0.0, 0.0, 0.0, 1.0]],
-            scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
-            box_type_3d=CameraInstance3DBoxes) for i in range(2)
-    ]
-    losses = self.loss(*ret_dict, gt_bboxes, gt_labels, gt_bboxes_3d,
-                       gt_labels_3d, centers2d, depths, attr_labels, img_metas)
-    assert losses['loss_cls'] >= 0
-    assert losses['loss_offset'] >= 0
-    assert losses['loss_depth'] >= 0
-    assert losses['loss_size'] >= 0
-    assert losses['loss_rotsin'] >= 0
-    assert losses['loss_centerness'] >= 0
-    assert losses['loss_kpts'] >= 0
-    assert losses['loss_bbox2d'] >= 0
-    assert losses['loss_consistency'] >= 0
-    assert losses['loss_dir'] >= 0
-
-    # test get_boxes
-    results = self.get_bboxes(*ret_dict, img_metas)
-    assert len(results) == 2
-    assert len(results[0]) == 5
-    assert results[0][0].tensor.shape == torch.Size([20, 7])
-    assert results[0][1].shape == torch.Size([20])
-    assert results[0][2].shape == torch.Size([20])
-    assert results[0][3] is None
-    assert results[0][4].shape == torch.Size([20, 5])
-
-
-def test_monoflex_head():
-
-    head_cfg = dict(
-        type='MonoFlexHead',
-        num_classes=3,
-        in_channels=64,
-        use_edge_fusion=True,
-        edge_fusion_inds=[(1, 0)],
-        edge_heatmap_ratio=1 / 8,
-        stacked_convs=0,
-        feat_channels=64,
-        use_direction_classifier=False,
-        diff_rad_by_sin=False,
-        pred_attrs=False,
-        pred_velo=False,
-        dir_offset=0,
-        strides=None,
-        group_reg_dims=((4, ), (2, ), (20, ), (3, ), (3, ), (8, 8), (1, ),
-                        (1, )),
-        cls_branch=(256, ),
-        reg_branch=((256, ), (256, ), (256, ), (256, ), (256, ), (256, ),
-                    (256, ), (256, )),
-        num_attrs=0,
-        bbox_code_size=7,
-        dir_branch=(),
-        attr_branch=(),
-        bbox_coder=dict(
-            type='MonoFlexCoder',
-            depth_mode='exp',
-            base_depth=(26.494627, 16.05988),
-            depth_range=[0.1, 100],
-            combine_depth=True,
-            uncertainty_range=[-10, 10],
-            base_dims=((3.8840, 1.5261, 1.6286, 0.4259, 0.1367, 0.1022),
-                       (0.8423, 1.7607, 0.6602, 0.2349, 0.1133, 0.1427),
-                       (1.7635, 1.7372, 0.5968, 0.1766, 0.0948, 0.1242)),
-            dims_mode='linear',
-            multibin=True,
-            num_dir_bins=4,
-            bin_centers=[0, np.pi / 2, np.pi, -np.pi / 2],
-            bin_margin=np.pi / 6,
-            code_size=7),
-        conv_bias=True,
-        dcn_on_last_conv=False)
-
-    self = build_head(head_cfg)
-
-    feats = [torch.rand([2, 64, 32, 32], dtype=torch.float32)]
-
-    input_metas = [
-        dict(img_shape=(110, 110), pad_shape=(128, 128)),
-        dict(img_shape=(98, 110), pad_shape=(128, 128))
-    ]
-    cls_score, out_reg = self(feats, input_metas)
-
-    assert cls_score[0].shape == torch.Size([2, 3, 32, 32])
-    assert out_reg[0].shape == torch.Size([2, 50, 32, 32])
--- a/tests/test_models/test_heads/test_paconv_decode_head.py
+++ b/tests/test_models/test_heads/test_paconv_decode_head.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import pytest
-import torch
-from mmcv.cnn.bricks import ConvModule
-
-from mmdet3d.models.builder import build_head
-
-
-def test_paconv_decode_head_loss():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    paconv_decode_head_cfg = dict(
-        type='PAConvHead',
-        fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
-                     (128 + 6, 128, 128, 128)),
-        channels=128,
-        num_classes=20,
-        dropout_ratio=0.5,
-        conv_cfg=dict(type='Conv1d'),
-        norm_cfg=dict(type='BN1d'),
-        act_cfg=dict(type='ReLU'),
-        loss_decode=dict(
-            type='CrossEntropyLoss',
-            use_sigmoid=False,
-            class_weight=None,
-            loss_weight=1.0),
-        ignore_index=20)
-
-    self = build_head(paconv_decode_head_cfg)
-    self.cuda()
-    assert isinstance(self.conv_seg, torch.nn.Conv1d)
-    assert self.conv_seg.in_channels == 128
-    assert self.conv_seg.out_channels == 20
-    assert self.conv_seg.kernel_size == (1, )
-    assert isinstance(self.pre_seg_conv, ConvModule)
-    assert isinstance(self.pre_seg_conv.conv, torch.nn.Conv1d)
-    assert self.pre_seg_conv.conv.in_channels == 128
-    assert self.pre_seg_conv.conv.out_channels == 128
-    assert self.pre_seg_conv.conv.kernel_size == (1, )
-    assert isinstance(self.pre_seg_conv.bn, torch.nn.BatchNorm1d)
-    assert self.pre_seg_conv.bn.num_features == 128
-    assert isinstance(self.pre_seg_conv.activate, torch.nn.ReLU)
-
-    # test forward
-    sa_xyz = [
-        torch.rand(2, 4096, 3).float().cuda(),
-        torch.rand(2, 1024, 3).float().cuda(),
-        torch.rand(2, 256, 3).float().cuda(),
-        torch.rand(2, 64, 3).float().cuda(),
-        torch.rand(2, 16, 3).float().cuda(),
-    ]
-    sa_features = [
-        torch.rand(2, 6, 4096).float().cuda(),
-        torch.rand(2, 64, 1024).float().cuda(),
-        torch.rand(2, 128, 256).float().cuda(),
-        torch.rand(2, 256, 64).float().cuda(),
-        torch.rand(2, 512, 16).float().cuda(),
-    ]
-    input_dict = dict(sa_xyz=sa_xyz, sa_features=sa_features)
-    seg_logits = self(input_dict)
-    assert seg_logits.shape == torch.Size([2, 20, 4096])
-
-    # test loss
-    pts_semantic_mask = torch.randint(0, 20, (2, 4096)).long().cuda()
-    losses = self.losses(seg_logits, pts_semantic_mask)
-    assert losses['loss_sem_seg'].item() > 0
-
-    # test loss with ignore_index
-    ignore_index_mask = torch.ones_like(pts_semantic_mask) * 20
-    losses = self.losses(seg_logits, ignore_index_mask)
-    assert losses['loss_sem_seg'].item() == 0
-
-    # test loss with class_weight
-    paconv_decode_head_cfg['loss_decode'] = dict(
-        type='CrossEntropyLoss',
-        use_sigmoid=False,
-        class_weight=np.random.rand(20),
-        loss_weight=1.0)
-    self = build_head(paconv_decode_head_cfg)
-    self.cuda()
-    losses = self.losses(seg_logits, pts_semantic_mask)
-    assert losses['loss_sem_seg'].item() > 0
--- a/tests/test_models/test_heads/test_parta2_bbox_head.py
+++ b/tests/test_models/test_heads/test_parta2_bbox_head.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import pytest
-import torch
-from mmcv import Config
-from mmcv.ops import SubMConv3d
-from torch.nn import BatchNorm1d, ReLU
-
-from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
-from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler
-from mmdet3d.models import PartA2BboxHead
-from mmdet3d.ops import make_sparse_convmodule
-
-
-def test_loss():
-    self = PartA2BboxHead(
-        num_classes=3,
-        seg_in_channels=16,
-        part_in_channels=4,
-        seg_conv_channels=[64, 64],
-        part_conv_channels=[64, 64],
-        merge_conv_channels=[128, 128],
-        down_conv_channels=[128, 256],
-        shared_fc_channels=[256, 512, 512, 512],
-        cls_channels=[256, 256],
-        reg_channels=[256, 256])
-
-    cls_score = torch.Tensor([[-3.6810], [-3.9413], [-5.3971], [-17.1281],
-                              [-5.9434], [-6.2251]])
-    bbox_pred = torch.Tensor(
-        [[
-            -6.3016e-03, -5.2294e-03, -1.2793e-02, -1.0602e-02, -7.4086e-04,
-            9.2471e-03, 7.3514e-03
-        ],
-         [
-             -1.1975e-02, -1.1578e-02, -3.1219e-02, 2.7754e-02, 6.9775e-03,
-             9.4042e-04, 9.0472e-04
-         ],
-         [
-             3.7539e-03, -9.1897e-03, -5.3666e-03, -1.0380e-05, 4.3467e-03,
-             4.2470e-03, 1.8355e-03
-         ],
-         [
-             -7.6093e-02, -1.2497e-01, -9.2942e-02, 2.1404e-02, 2.3750e-02,
-             1.0365e-01, -1.3042e-02
-         ],
-         [
-             2.7577e-03, -1.1514e-02, -1.1097e-02, -2.4946e-03, 2.3268e-03,
-             1.6797e-03, -1.4076e-03
-         ],
-         [
-             3.9635e-03, -7.8551e-03, -3.5125e-03, 2.1229e-04, 9.7042e-03,
-             1.7499e-03, -5.1254e-03
-         ]])
-    rois = torch.Tensor([
-        [0.0000, 13.3711, -12.5483, -1.9306, 1.7027, 4.2836, 1.4283, -1.1499],
-        [0.0000, 19.2472, -7.2655, -10.6641, 3.3078, 83.1976, 29.3337, 2.4501],
-        [0.0000, 13.8012, -10.9791, -3.0617, 0.2504, 1.2518, 0.8807, 3.1034],
-        [0.0000, 16.2736, -9.0284, -2.0494, 8.2697, 31.2336, 9.1006, 1.9208],
-        [0.0000, 10.4462, -13.6879, -3.1869, 7.3366, 0.3518, 1.7199, -0.7225],
-        [0.0000, 11.3374, -13.6671, -3.2332, 4.9934, 0.3750, 1.6033, -0.9665]
-    ])
-    labels = torch.Tensor([0.7100, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
-    bbox_targets = torch.Tensor(
-        [[0.0598, 0.0243, -0.0984, -0.0454, 0.0066, 0.1114, 0.1714]])
-    pos_gt_bboxes = torch.Tensor(
-        [[13.6686, -12.5586, -2.1553, 1.6271, 4.3119, 1.5966, 2.1631]])
-    reg_mask = torch.Tensor([1, 0, 0, 0, 0, 0])
-    label_weights = torch.Tensor(
-        [0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078])
-    bbox_weights = torch.Tensor([1., 0., 0., 0., 0., 0.])
-
-    loss = self.loss(cls_score, bbox_pred, rois, labels, bbox_targets,
-                     pos_gt_bboxes, reg_mask, label_weights, bbox_weights)
-
-    expected_loss_cls = torch.Tensor([
-        2.0579e-02, 1.5005e-04, 3.5252e-05, 0.0000e+00, 2.0433e-05, 1.5422e-05
-    ])
-    expected_loss_bbox = torch.as_tensor(0.0622)
-    expected_loss_corner = torch.Tensor([0.1374])
-
-    assert torch.allclose(loss['loss_cls'], expected_loss_cls, 1e-3)
-    assert torch.allclose(loss['loss_bbox'], expected_loss_bbox, 1e-3)
-    assert torch.allclose(loss['loss_corner'], expected_loss_corner, 1e-3)
-
-
-def test_get_targets():
-    self = PartA2BboxHead(
-        num_classes=3,
-        seg_in_channels=16,
-        part_in_channels=4,
-        seg_conv_channels=[64, 64],
-        part_conv_channels=[64, 64],
-        merge_conv_channels=[128, 128],
-        down_conv_channels=[128, 256],
-        shared_fc_channels=[256, 512, 512, 512],
-        cls_channels=[256, 256],
-        reg_channels=[256, 256])
-
-    sampling_result = IoUNegPiecewiseSampler(
-        1,
-        pos_fraction=0.55,
-        neg_piece_fractions=[0.8, 0.2],
-        neg_iou_piece_thrs=[0.55, 0.1],
-        return_iou=True)
-    sampling_result.pos_bboxes = torch.Tensor(
-        [[8.1517, 0.0384, -1.9496, 1.5271, 4.1131, 1.4879, 1.2076]])
-    sampling_result.pos_gt_bboxes = torch.Tensor(
-        [[7.8417, -0.1405, -1.9652, 1.6122, 3.2838, 1.5331, -2.0835]])
-    sampling_result.iou = torch.Tensor([
-        6.7787e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
-        0.0000e+00, 1.2839e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
-        0.0000e+00, 0.0000e+00, 0.0000e+00, 7.0261e-04, 0.0000e+00, 0.0000e+00,
-        0.0000e+00, 0.0000e+00, 5.8915e-02, 0.0000e+00, 0.0000e+00, 0.0000e+00,
-        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 5.6628e-06,
-        5.0271e-02, 0.0000e+00, 1.9608e-01, 0.0000e+00, 0.0000e+00, 2.3519e-01,
-        1.6589e-02, 0.0000e+00, 1.0162e-01, 2.1634e-02, 0.0000e+00, 0.0000e+00,
-        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 5.6326e-02,
-        1.3810e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
-        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
-        4.5455e-02, 0.0000e+00, 1.0929e-03, 0.0000e+00, 8.8191e-02, 1.1012e-01,
-        0.0000e+00, 0.0000e+00, 0.0000e+00, 1.6236e-01, 0.0000e+00, 1.1342e-01,
-        1.0636e-01, 9.9803e-02, 5.7394e-02, 0.0000e+00, 1.6773e-01, 0.0000e+00,
-        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.3464e-03,
-        0.0000e+00, 2.7977e-01, 0.0000e+00, 3.1252e-01, 2.1642e-01, 2.2945e-01,
-        0.0000e+00, 1.8297e-01, 0.0000e+00, 2.1908e-01, 1.1661e-01, 1.3513e-01,
-        1.5898e-01, 7.4368e-03, 1.2523e-01, 1.4735e-04, 0.0000e+00, 0.0000e+00,
-        0.0000e+00, 1.0948e-01, 2.5889e-01, 4.4585e-04, 8.6483e-02, 1.6376e-01,
-        0.0000e+00, 2.2894e-01, 2.7489e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
-        1.8334e-01, 1.0193e-01, 2.3389e-01, 1.1035e-01, 3.3700e-01, 1.4397e-01,
-        1.0379e-01, 0.0000e+00, 1.1226e-01, 0.0000e+00, 0.0000e+00, 1.6201e-01,
-        0.0000e+00, 1.3569e-01
-    ])
-
-    rcnn_train_cfg = Config({
-        'assigner': [{
-            'type': 'MaxIoUAssigner',
-            'iou_calculator': {
-                'type': 'BboxOverlaps3D',
-                'coordinate': 'lidar'
-            },
-            'pos_iou_thr': 0.55,
-            'neg_iou_thr': 0.55,
-            'min_pos_iou': 0.55,
-            'ignore_iof_thr': -1
-        }, {
-            'type': 'MaxIoUAssigner',
-            'iou_calculator': {
-                'type': 'BboxOverlaps3D',
-                'coordinate': 'lidar'
-            },
-            'pos_iou_thr': 0.55,
-            'neg_iou_thr': 0.55,
-            'min_pos_iou': 0.55,
-            'ignore_iof_thr': -1
-        }, {
-            'type': 'MaxIoUAssigner',
-            'iou_calculator': {
-                'type': 'BboxOverlaps3D',
-                'coordinate': 'lidar'
-            },
-            'pos_iou_thr': 0.55,
-            'neg_iou_thr': 0.55,
-            'min_pos_iou': 0.55,
-            'ignore_iof_thr': -1
-        }],
-        'sampler': {
-            'type': 'IoUNegPiecewiseSampler',
-            'num': 128,
-            'pos_fraction': 0.55,
-            'neg_piece_fractions': [0.8, 0.2],
-            'neg_iou_piece_thrs': [0.55, 0.1],
-            'neg_pos_ub': -1,
-            'add_gt_as_proposals': False,
-            'return_iou': True
-        },
-        'cls_pos_thr':
-        0.75,
-        'cls_neg_thr':
-        0.25
-    })
-
-    label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights\
-        = self.get_targets([sampling_result], rcnn_train_cfg)
-
-    expected_label = torch.Tensor([
-        0.8557, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0595, 0.0000, 0.1250, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0178, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000, 0.0498, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.1740, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-        0.0000, 0.0000
-    ])
-
-    expected_bbox_targets = torch.Tensor(
-        [[-0.0632, 0.0516, 0.0047, 0.0542, -0.2252, 0.0299, -0.1495]])
-
-    expected_pos_gt_bboxes = torch.Tensor(
-        [[7.8417, -0.1405, -1.9652, 1.6122, 3.2838, 1.5331, -2.0835]])
-
-    expected_reg_mask = torch.LongTensor([
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0
-    ])
-
-    expected_label_weights = torch.Tensor([
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,
-        0.0078, 0.0078
-    ])
-
-    expected_bbox_weights = torch.Tensor([
-        1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-        0., 0.
-    ])
-
-    assert torch.allclose(label, expected_label, 1e-2)
-    assert torch.allclose(bbox_targets, expected_bbox_targets, 1e-2)
-    assert torch.allclose(pos_gt_bboxes, expected_pos_gt_bboxes)
-    assert torch.all(reg_mask == expected_reg_mask)
-    assert torch.allclose(label_weights, expected_label_weights, 1e-2)
-    assert torch.allclose(bbox_weights, expected_bbox_weights)
-
-
-def test_get_bboxes():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    self = PartA2BboxHead(
-        num_classes=3,
-        seg_in_channels=16,
-        part_in_channels=4,
-        seg_conv_channels=[64, 64],
-        part_conv_channels=[64, 64],
-        merge_conv_channels=[128, 128],
-        down_conv_channels=[128, 256],
-        shared_fc_channels=[256, 512, 512, 512],
-        cls_channels=[256, 256],
-        reg_channels=[256, 256])
-
-    rois = torch.Tensor([[
-        0.0000e+00, 5.6284e+01, 2.5712e+01, -1.3196e+00, 1.5943e+00,
-        3.7509e+00, 1.4969e+00, 1.2105e-03
-    ],
-                         [
-                             0.0000e+00, 5.4685e+01, 2.9132e+01, -1.9178e+00,
-                             1.6337e+00, 4.1116e+00, 1.5472e+00, -1.7312e+00
-                         ],
-                         [
-                             0.0000e+00, 5.5927e+01, 2.5830e+01, -1.4099e+00,
-                             1.5958e+00, 3.8861e+00, 1.4911e+00, -2.9276e+00
-                         ],
-                         [
-                             0.0000e+00, 5.6306e+01, 2.6310e+01, -1.3729e+00,
-                             1.5893e+00, 3.7448e+00, 1.4924e+00, 1.6071e-01
-                         ],
-                         [
-                             0.0000e+00, 3.1633e+01, -5.8557e+00, -1.2541e+00,
-                             1.6517e+00, 4.1829e+00, 1.5593e+00, -1.6037e+00
-                         ],
-                         [
-                             0.0000e+00, 3.1789e+01, -5.5308e+00, -1.3012e+00,
-                             1.6412e+00, 4.1070e+00, 1.5487e+00, -1.6517e+00
-                         ]]).cuda()
-
-    cls_score = torch.Tensor([[-2.2061], [-2.1121], [-1.4478], [-2.9614],
-                              [-0.1761], [0.7357]]).cuda()
-
-    bbox_pred = torch.Tensor(
-        [[
-            -4.7917e-02, -1.6504e-02, -2.2340e-02, 5.1296e-03, -2.0984e-02,
-            1.0598e-02, -1.1907e-01
-        ],
-         [
-             -1.6261e-02, -5.4005e-02, 6.2480e-03, 1.5496e-03, -1.3285e-02,
-             8.1482e-03, -2.2707e-03
-         ],
-         [
-             -3.9423e-02, 2.0151e-02, -2.1138e-02, -1.1845e-03, -1.5343e-02,
-             5.7208e-03, 8.5646e-03
-         ],
-         [
-             6.3104e-02, -3.9307e-02, 2.3005e-02, -7.0528e-03, -9.2637e-05,
-             2.2656e-02, 1.6358e-02
-         ],
-         [
-             -1.4864e-03, 5.6840e-02, 5.8247e-03, -3.5541e-03, -4.9658e-03,
-             2.5036e-03, 3.0302e-02
-         ],
-         [
-             -4.3259e-02, -1.9963e-02, 3.5004e-02, 3.7546e-03, 1.0876e-02,
-             -3.9637e-04, 2.0445e-02
-         ]]).cuda()
-
-    class_labels = [torch.Tensor([2, 2, 2, 2, 2, 2]).cuda()]
-
-    class_pred = [
-        torch.Tensor([[1.0877e-05, 1.0318e-05, 2.6599e-01],
-                      [1.3105e-05, 1.1904e-05, 2.4432e-01],
-                      [1.4530e-05, 1.4619e-05, 2.4395e-01],
-                      [1.3251e-05, 1.3038e-05, 2.3703e-01],
-                      [2.9156e-05, 2.5521e-05, 2.2826e-01],
-                      [3.1665e-05, 2.9054e-05, 2.2077e-01]]).cuda()
-    ]
-
-    cfg = Config(
-        dict(
-            use_rotate_nms=True,
-            use_raw_score=True,
-            nms_thr=0.01,
-            score_thr=0.1))
-    input_meta = dict(
-        box_type_3d=LiDARInstance3DBoxes, box_mode_3d=Box3DMode.LIDAR)
-    result_list = self.get_bboxes(rois, cls_score, bbox_pred, class_labels,
-                                  class_pred, [input_meta], cfg)
-    selected_bboxes, selected_scores, selected_label_preds = result_list[0]
-
-    expected_selected_bboxes = torch.Tensor(
-        [[56.0888, 25.6445, -1.3610, 1.6025, 3.6730, 1.5128, -0.1179],
-         [54.4606, 29.2412, -1.9145, 1.6362, 4.0573, 1.5599, -1.7335],
-         [31.8887, -5.8574, -1.2470, 1.6458, 4.1622, 1.5632, -1.5734]]).cuda()
-    expected_selected_scores = torch.Tensor([-2.2061, -2.1121, -0.1761]).cuda()
-    expected_selected_label_preds = torch.Tensor([2., 2., 2.]).cuda()
-    assert torch.allclose(selected_bboxes.tensor, expected_selected_bboxes,
-                          1e-3)
-    assert torch.allclose(selected_scores, expected_selected_scores, 1e-3)
-    assert torch.allclose(selected_label_preds, expected_selected_label_preds)
-
-
-def test_multi_class_nms():
-    if not torch.cuda.is_available():
-        pytest.skip()
-
-    self = PartA2BboxHead(
-        num_classes=3,
-        seg_in_channels=16,
-        part_in_channels=4,
-        seg_conv_channels=[64, 64],
-        part_conv_channels=[64, 64],
-        merge_conv_channels=[128, 128],
-        down_conv_channels=[128, 256],
-        shared_fc_channels=[256, 512, 512, 512],
-        cls_channels=[256, 256],
-        reg_channels=[256, 256])
-
-    box_probs = torch.Tensor([[1.0877e-05, 1.0318e-05, 2.6599e-01],
-                              [1.3105e-05, 1.1904e-05, 2.4432e-01],
-                              [1.4530e-05, 1.4619e-05, 2.4395e-01],
-                              [1.3251e-05, 1.3038e-05, 2.3703e-01],
-                              [2.9156e-05, 2.5521e-05, 2.2826e-01],
-                              [3.1665e-05, 2.9054e-05, 2.2077e-01],
-                              [5.5738e-06, 6.2453e-06, 2.1978e-01],
-                              [9.0193e-06, 9.2154e-06, 2.1418e-01],
-                              [1.4004e-05, 1.3209e-05, 2.1316e-01],
-                              [7.9210e-06, 8.1767e-06, 2.1304e-01]]).cuda()
-
-    box_preds = torch.Tensor(
-        [[
-            5.6217e+01, 2.5908e+01, -1.3611e+00, 1.6025e+00, 3.6730e+00,
-            1.5129e+00, 1.1786e-01
-        ],
-         [
-             5.4653e+01, 2.8885e+01, -1.9145e+00, 1.6362e+00, 4.0574e+00,
-             1.5599e+00, 1.7335e+00
-         ],
-         [
-             5.5809e+01, 2.5686e+01, -1.4457e+00, 1.5939e+00, 3.8270e+00,
-             1.4997e+00, 2.9191e+00
-         ],
-         [
-             5.6107e+01, 2.6082e+01, -1.3557e+00, 1.5782e+00, 3.7444e+00,
-             1.5266e+00, -1.7707e-01
-         ],
-         [
-             3.1618e+01, -5.6004e+00, -1.2470e+00, 1.6459e+00, 4.1622e+00,
-             1.5632e+00, 1.5734e+00
-         ],
-         [
-             3.1605e+01, -5.6342e+00, -1.2467e+00, 1.6474e+00, 4.1519e+00,
-             1.5481e+00, 1.6313e+00
-         ],
-         [
-             5.6211e+01, 2.7294e+01, -1.5350e+00, 1.5422e+00, 3.7733e+00,
-             1.5140e+00, -9.5846e-02
-         ],
-         [
-             5.5907e+01, 2.7155e+01, -1.4712e+00, 1.5416e+00, 3.7611e+00,
-             1.5142e+00, 5.2059e-02
-         ],
-         [
-             5.4000e+01, 3.0585e+01, -1.6874e+00, 1.6495e+00, 4.0376e+00,
-             1.5554e+00, 1.7900e+00
-         ],
-         [
-             5.6007e+01, 2.6300e+01, -1.3945e+00, 1.5716e+00, 3.7064e+00,
-             1.4715e+00, 2.9639e+00
-         ]]).cuda()
-
-    input_meta = dict(
-        box_type_3d=LiDARInstance3DBoxes, box_mode_3d=Box3DMode.LIDAR)
-    selected = self.multi_class_nms(box_probs, box_preds, 0.1, 0.001,
-                                    input_meta)
-    expected_selected = torch.Tensor([0, 1, 4, 8]).cuda()
-
-    assert torch.all(selected == expected_selected)
-
-
-def test_make_sparse_convmodule():
-    with pytest.raises(AssertionError):
-        # assert invalid order setting
-        make_sparse_convmodule(
-            in_channels=4,
-            out_channels=8,
-            kernel_size=3,
-            indice_key='rcnn_part2',
-            norm_cfg=dict(type='BN1d'),
-            order=('norm', 'act', 'conv', 'norm'))
-
-        # assert invalid type of order
-        make_sparse_convmodule(
-            in_channels=4,
-            out_channels=8,
-            kernel_size=3,
-            indice_key='rcnn_part2',
-            norm_cfg=dict(type='BN1d'),
-            order=['norm', 'conv'])
-
-        # assert invalid elements of order
-        make_sparse_convmodule(
-            in_channels=4,
-            out_channels=8,
-            kernel_size=3,
-            indice_key='rcnn_part2',
-            norm_cfg=dict(type='BN1d'),
-            order=('conv', 'normal', 'activate'))
-
-    sparse_convmodule = make_sparse_convmodule(
-        in_channels=4,
-        out_channels=64,
-        kernel_size=3,
-        padding=1,
-        indice_key='rcnn_part0',
-        norm_cfg=dict(type='BN1d', eps=0.001, momentum=0.01))
-
-    assert isinstance(sparse_convmodule[0], SubMConv3d)
-    assert isinstance(sparse_convmodule[1], BatchNorm1d)
-    assert isinstance(sparse_convmodule[2], ReLU)
-    assert sparse_convmodule[1].num_features == 64
-    assert sparse_convmodule[1].eps == 0.001
-    assert sparse_convmodule[1].affine is True
-    assert sparse_convmodule[1].track_running_stats is True
-    assert isinstance(sparse_convmodule[2], ReLU)
-    assert sparse_convmodule[2].inplace is True
-
-    pre_act = make_sparse_convmodule(
-        in_channels=4,
-        out_channels=8,
-        kernel_size=3,
-        indice_key='rcnn_part1',
-        norm_cfg=dict(type='BN1d'),
-        order=('norm', 'act', 'conv'))
-    assert isinstance(pre_act[0], BatchNorm1d)
-    assert isinstance(pre_act[1], ReLU)
-    assert isinstance(pre_act[2], SubMConv3d)