Release v1.0.0rc1

333536f6 · Wenwei Zhang · GitHub · 9c7270d0 · f747daab · 333536f6
Unverified Commit 333536f6 authored Apr 06, 2022 by Wenwei Zhang Committed by GitHub Apr 06, 2022
19 changed files
--- a/tests/test_models/test_common_modules/test_paconv_ops.py
+++ b/tests/test_models/test_common_modules/test_paconv_ops.py
@@ -2,190 +2,7 @@
 import pytest
 import torch

-from mmdet3d.ops import PAConv, PAConvCUDA, assign_score_withk
-
-
-def test_paconv_assign_scores():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    scores = torch.tensor([[[[0.06947571, 0.6065746], [0.28462553, 0.8378516],
-                             [0.7595994, 0.97220325], [0.519155, 0.766185]],
-                            [[0.15348864, 0.6051019], [0.21510637, 0.31916398],
-                             [0.00236845, 0.5842595], [0.6783676, 0.5216348]]],
-                           [[[0.23089725, 0.5568468], [0.7405102, 0.06438422],
-                             [0.6887394, 0.22089851], [0.0502342, 0.79228795]],
-                            [[0.44883424, 0.15427643],
-                             [0.13817799, 0.34856772], [0.7989621, 0.33788306],
-                             [0.15699774, 0.7693662]]]]).float().cuda()
-    scores.requires_grad_()
-    points = torch.tensor([[[[0.06001121, 0.92963666, 0.5753327, 0.7251477],
-                             [0.53563064, 0.23129565, 0.92366195, 0.44261628]],
-                            [[0.5770022, 0.56625944, 0.23560429, 0.11178821],
-                             [0.7735967, 0.95678777, 0.25468266, 0.02895975]],
-                            [[0.0589869, 0.09017515, 0.5977862, 0.02797985],
-                             [0.603862, 0.35991007, 0.85761684, 0.3096559]],
-                            [[0.22359002, 0.13983732, 0.5544243, 0.68863827],
-                             [0.85646236, 0.75651926, 0.8638947, 0.83600986]],
-                            [[0.45424145, 0.27458847, 0.6456112, 0.47162914],
-                             [0.15773582, 0.47645122, 0.79964715, 0.3323908]],
-                            [[0.8351399, 0.84696376, 0.9431732, 0.29418713],
-                             [0.77168906, 0.6996871, 0.19354361, 0.03392768]],
-                            [[0.30976456, 0.7074133, 0.581795, 0.976677],
-                             [0.69656056, 0.07199162, 0.4708506, 0.29117996]],
-                            [[0.5829035, 0.30201727, 0.76556486, 0.0935446],
-                             [0.88030535, 0.16129416, 0.9242525, 0.49545723]]],
-                           [[[0.50899494, 0.06482804, 0.44939405, 0.37704808],
-                             [0.47028124, 0.11969638, 0.62823206, 0.28560323]],
-                            [[0.40690207, 0.689753, 0.51636654, 0.23040164],
-                             [0.06935787, 0.00488842, 0.22462702, 0.09182382]],
-                            [[0.26611632, 0.00184339, 0.7730655, 0.5228131],
-                             [0.87776035, 0.77895886, 0.2787183, 0.16620636]],
-                            [[0.502574, 0.04039001, 0.5368497, 0.98379374],
-                             [0.40973026, 0.3238272, 0.9733018, 0.13988364]],
-                            [[0.04586202, 0.20983845, 0.20662665, 0.22270602],
-                             [0.60387236, 0.5155574, 0.51237285, 0.6528438]],
-                            [[0.45735973, 0.86821306, 0.61054605, 0.8370336],
-                             [0.45193362, 0.3734138, 0.7825672, 0.5699416]],
-                            [[0.44591594, 0.12447512, 0.09282011, 0.7055254],
-                             [0.25223452, 0.46696228, 0.7051136, 0.892151]],
-                            [[0.49615085, 0.47321403, 0.93138885, 0.7652197],
-                             [0.38766378, 0.30332977, 0.23131835,
-                              0.02863514]]]]).float().cuda()
-    points.requires_grad_()
-    centers = torch.tensor([[[[0.83878064, 0.96658987, 0.8033424, 0.9598312],
-                              [0.45035273, 0.8768925, 0.977736, 0.54547966]],
-                             [[0.01041394, 0.597893, 0.36212963, 0.4410367],
-                              [0.94879234, 0.8372817, 0.21237361, 0.67945415]],
-                             [[0.5096087, 0.26401454, 0.60034937, 0.5417416],
-                              [0.87591463, 0.546456, 0.4096033, 0.16373193]],
-                             [[0.79547447, 0.1482386, 0.12840575, 0.45384115],
-                              [0.5640288, 0.944541, 0.5745328, 0.73229736]],
-                             [[0.93011934, 0.7406011, 0.62621707, 0.8677915],
-                              [0.91563636, 0.3595413, 0.6678378, 0.6085383]],
-                             [[0.22431666, 0.65617776, 0.7483924, 0.6263364],
-                              [0.30968404, 0.78204364, 0.14899081,
-                               0.09628749]],
-                             [[0.73675203, 0.72104895, 0.4648038, 0.6101647],
-                              [0.7817645, 0.16572917, 0.3311919, 0.43407398]],
-                             [[0.8193154, 0.09559608, 0.05978829, 0.90262103],
-                              [0.4256065, 0.8165596, 0.8206446, 0.6604721]]],
-                            [[[0.7159653, 0.18600845, 0.21433902, 0.3159626],
-                              [0.3921569, 0.33221376, 0.5061177, 0.7961841]],
-                             [[0.95338356, 0.04785997, 0.67185795, 0.6538394],
-                              [0.4729132, 0.33404195, 0.17750603, 0.8445621]],
-                             [[0.6755793, 0.16193843, 0.75943846, 0.92123103],
-                              [0.2781859, 0.03114432, 0.710638, 0.52729136]],
-                             [[0.8376105, 0.10858494, 0.13208169, 0.365772],
-                              [0.5930795, 0.27390373, 0.14036089, 0.170403]],
-                             [[0.3479789, 0.89855295, 0.04844379, 0.9871029],
-                              [0.29781651, 0.0244137, 0.9179047, 0.8081611]],
-                             [[0.12460887, 0.44991326, 0.19382608, 0.35037738],
-                              [0.2773472, 0.4362057, 0.36757517, 0.5993509]],
-                             [[0.29630446, 0.90046406, 0.5417113, 0.13510644],
-                              [0.09623539, 0.04226565, 0.32001644,
-                               0.44358212]],
-                             [[0.5274848, 0.82096446, 0.9415489, 0.7123748],
-                              [0.7537517, 0.8086482, 0.85345286,
-                               0.7472754]]]]).float().cuda()
-    centers.requires_grad_()
-    knn_idx = torch.tensor([[[6, 7, 4, 6], [2, 4, 2, 4]],
-                            [[7, 1, 3, 2], [6, 0, 2, 6]]]).long().cuda()
-    aggregate = 'sum'
-    expected_output = torch.tensor(
-        [[[[-0.08134781, 0.03877336, -0.8212776, -0.2869547],
-           [-0.23378491, -0.24112664, -0.1600166, -0.4121864]],
-          [[-0.05780616, -0.12298299, -0.0370461, -0.07889931],
-           [-0.13956165, -0.02006848, -0.10940295, -0.0293439]],
-          [[0.09284145, 0.58250105, 0.5927749, 0.16774094],
-           [0.27070042, 0.13422406, 0.2617501, 0.23416464]],
-          [[-0.06121218, -0.09561322, -0.20408826, 0.08079343],
-           [0.00944228, 0.03874819, 0.08404065, 0.04041629]]],
-         [[[-0.2110898, -0.13335688, -0.09315082, 0.08512095],
-           [0.09121774, 0.15976946, 0.23994486, 0.14350912]],
-          [[-0.36167958, -0.14891288, -0.64470863, -0.0646704],
-           [-0.28276974, -0.08847666, -0.46904767, 0.20491874]],
-          [[-0.34877953, -0.35533834, -0.25225785, -0.4638189],
-           [-0.1420663, 0.09467781, 0.17088932, 0.22580585]],
-          [[-0.3879708, -0.3991068, 0.05276498, -0.46989647],
-           [0.32522714, -0.02163534, 0.21604237, 0.4346682]]]]).float()
-
-    # test forward
-    output = assign_score_withk(scores, points, centers, knn_idx, aggregate)
-    assert torch.allclose(output.detach().cpu(), expected_output, atol=1e-6)
-
-    # test backward
-    loss = output.sum()
-    loss.backward()
-    expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683],
-                                           [-0.78873926, 0.7485497],
-                                           [-0.6866992, 0.05346543],
-                                           [0.04288036, -0.18217683]],
-                                          [[-1.1407862, 0.13533896],
-                                           [-0.06964391, -0.22948086],
-                                           [-1.1407862, 0.13533896],
-                                           [-0.06964391, -0.22948086]]],
-                                         [[[-0.3363995, -2.212181],
-                                           [-1.1589496, -2.7724311],
-                                           [-0.9387654, -1.3163853],
-                                           [-1.4385346, -1.0614843]],
-                                          [[-0.5048497, 1.4143617],
-                                           [-0.47332114, 0.6017133],
-                                           [-0.30974793, 1.1995442],
-                                           [-0.5048497, 1.4143617]]]]).float()
-    expected_points_grad = torch.tensor(
-        [[[[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0.15585709, 0.15585709, 0.15585709, 0.15585709],
-           [1.1893613, 1.1893613, 1.1893613, 1.1893613]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[1.6530733, 1.6530733, 1.6530733, 1.6530733],
-           [1.8130021, 1.8130021, 1.8130021, 1.8130021]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0.58863074, 0.58863074, 0.58863074, 0.58863074],
-           [1.3727596, 1.3727596, 1.3727596, 1.3727596]],
-          [[0.28462553, 0.28462553, 0.28462553, 0.28462553],
-           [0.8378516, 0.8378516, 0.8378516, 0.8378516]]],
-         [[[0.13817799, 0.13817799, 0.13817799, 0.13817799],
-           [0.34856772, 0.34856772, 0.34856772, 0.34856772]],
-          [[0.7405102, 0.7405102, 0.7405102, 0.7405102],
-           [0.06438422, 0.06438422, 0.06438422, 0.06438422]],
-          [[0.8491963, 0.8491963, 0.8491963, 0.8491963],
-           [1.1301711, 1.1301711, 1.1301711, 1.1301711]],
-          [[0.6887394, 0.6887394, 0.6887394, 0.6887394],
-           [0.22089851, 0.22089851, 0.22089851, 0.22089851]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0.605832, 0.605832, 0.605832, 0.605832],
-           [0.92364264, 0.92364264, 0.92364264, 0.92364264]],
-          [[0.23089725, 0.23089725, 0.23089725, 0.23089725],
-           [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float()
-    expected_centers_grad = torch.tensor(
-        [[[[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[-1.0493311, -1.0493311, -1.0493311, -1.0493311],
-           [-2.0301602, -2.0301602, -2.0301602, -2.0301602]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[-1.6328557, -1.6328557, -1.6328557, -1.6328557],
-           [-3.1828144, -3.1828144, -3.1828144, -3.1828144]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]]],
-         [[[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-          [[-1.5429721, -1.5429721, -1.5429721, -1.5429721],
-           [-1.6100934, -1.6100934, -1.6100934, -1.6100934]],
-          [[-1.7103812, -1.7103812, -1.7103812, -1.7103812],
-           [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float()
-    assert torch.allclose(
-        scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6)
-    assert torch.allclose(
-        points.grad.detach().cpu(), expected_points_grad, atol=1e-6)
-    assert torch.allclose(
-        centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6)
+from mmdet3d.ops import PAConv, PAConvCUDA


 def test_paconv():

--- a/tests/test_models/test_common_modules/test_pointnet_ops.py
+++ b/tests/test_models/test_common_modules/test_pointnet_ops.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import pytest
-import torch
-
-from mmdet3d.ops import (ball_query, furthest_point_sample,
-                         furthest_point_sample_with_dist, gather_points,
-                         grouping_operation, knn, three_interpolate, three_nn)
-
-
-def test_fps():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681],
-                         [-0.8070, 2.4137,
-                          -0.5845], [-1.0001, 2.1982, -0.5859],
-                         [0.3841, 1.8983, -0.7431]],
-                        [[-1.0696, 3.0758,
-                          -0.1899], [-0.2559, 3.5521, -0.1402],
-                         [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205],
-                         [-0.0518, 3.7251, -0.3950]]]).cuda()
-
-    idx = furthest_point_sample(xyz, 3)
-    expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda()
-    assert torch.all(idx == expected_idx)
-
-
-def test_ball_query():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625],
-                             [-2.2769, 2.7817, -0.2334],
-                             [-0.4003, 2.4666, -0.5116],
-                             [-0.0740, 1.3147, -1.3625],
-                             [-0.0740, 1.3147, -1.3625]],
-                            [[-2.0289, 2.4952, -0.1708],
-                             [-2.0668, 6.0278, -0.4875],
-                             [0.4066, 1.4211, -0.2947],
-                             [-2.0289, 2.4952, -0.1708],
-                             [-2.0289, 2.4952, -0.1708]]]).cuda()
-
-    xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],
-                         [-0.4003, 2.4666,
-                          -0.5116], [-0.5251, 2.4379, -0.8466],
-                         [-0.9691, 1.1418,
-                          -1.3733], [-0.2232, 0.9561, -1.3626],
-                         [-2.2769, 2.7817, -0.2334],
-                         [-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432],
-                         [0.4917, 1.1529, -1.3496]],
-                        [[-2.0289, 2.4952,
-                          -0.1708], [-0.7188, 0.9956, -0.5096],
-                         [-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],
-                         [0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],
-                         [-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],
-                         [0.3220, 1.4447, 0.3548], [-0.9744, 2.3856,
-                                                    -1.2000]]]).cuda()
-
-    idx = ball_query(0, 0.2, 5, xyz, new_xyz)
-    expected_idx = torch.tensor([[[0, 0, 0, 0, 0], [6, 6, 6, 6, 6],
-                                  [2, 2, 2, 2, 2], [0, 0, 0, 0, 0],
-                                  [0, 0, 0, 0, 0]],
-                                 [[0, 0, 0, 0, 0], [2, 2, 2, 2, 2],
-                                  [7, 7, 7, 7, 7], [0, 0, 0, 0, 0],
-                                  [0, 0, 0, 0, 0]]]).cuda()
-    assert torch.all(idx == expected_idx)
-
-    # test dilated ball query
-    idx = ball_query(0.2, 0.4, 5, xyz, new_xyz)
-    expected_idx = torch.tensor([[[0, 5, 7, 0, 0], [6, 6, 6, 6, 6],
-                                  [2, 3, 2, 2, 2], [0, 5, 7, 0, 0],
-                                  [0, 5, 7, 0, 0]],
-                                 [[0, 0, 0, 0, 0], [2, 2, 2, 2, 2],
-                                  [7, 7, 7, 7, 7], [0, 0, 0, 0, 0],
-                                  [0, 0, 0, 0, 0]]]).cuda()
-    assert torch.all(idx == expected_idx)
-
-
-def test_knn():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625],
-                             [-2.2769, 2.7817, -0.2334],
-                             [-0.4003, 2.4666, -0.5116],
-                             [-0.0740, 1.3147, -1.3625],
-                             [-0.0740, 1.3147, -1.3625]],
-                            [[-2.0289, 2.4952, -0.1708],
-                             [-2.0668, 6.0278, -0.4875],
-                             [0.4066, 1.4211, -0.2947],
-                             [-2.0289, 2.4952, -0.1708],
-                             [-2.0289, 2.4952, -0.1708]]]).cuda()
-
-    xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],
-                         [-0.4003, 2.4666,
-                          -0.5116], [-0.5251, 2.4379, -0.8466],
-                         [-0.9691, 1.1418,
-                          -1.3733], [-0.2232, 0.9561, -1.3626],
-                         [-2.2769, 2.7817, -0.2334],
-                         [-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432],
-                         [0.4917, 1.1529, -1.3496]],
-                        [[-2.0289, 2.4952,
-                          -0.1708], [-0.7188, 0.9956, -0.5096],
-                         [-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],
-                         [0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],
-                         [-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],
-                         [0.3220, 1.4447, 0.3548], [-0.9744, 2.3856,
-                                                    -1.2000]]]).cuda()
-
-    idx = knn(5, xyz, new_xyz)
-    new_xyz_ = new_xyz.unsqueeze(2).repeat(1, 1, xyz.shape[1], 1)
-    xyz_ = xyz.unsqueeze(1).repeat(1, new_xyz.shape[1], 1, 1)
-    dist = ((new_xyz_ - xyz_) * (new_xyz_ - xyz_)).sum(-1)
-    expected_idx = dist.topk(k=5, dim=2, largest=False)[1].transpose(2, 1)
-    assert torch.all(idx == expected_idx)
-
-    idx = knn(5,
-              xyz.transpose(1, 2).contiguous(),
-              new_xyz.transpose(1, 2).contiguous(), True)
-    assert torch.all(idx == expected_idx)
-
-    idx = knn(5, xyz, xyz)
-    xyz_ = xyz.unsqueeze(2).repeat(1, 1, xyz.shape[1], 1)
-    xyz__ = xyz.unsqueeze(1).repeat(1, xyz.shape[1], 1, 1)
-    dist = ((xyz_ - xyz__) * (xyz_ - xyz__)).sum(-1)
-    expected_idx = dist.topk(k=5, dim=2, largest=False)[1].transpose(2, 1)
-    assert torch.all(idx == expected_idx)
-
-
-def test_grouping_points():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    idx = torch.tensor([[[0, 0, 0], [3, 3, 3], [8, 8, 8], [0, 0, 0], [0, 0, 0],
-                         [0, 0, 0]],
-                        [[0, 0, 0], [6, 6, 6], [9, 9, 9], [0, 0, 0], [0, 0, 0],
-                         [0, 0, 0]]]).int().cuda()
-    festures = torch.tensor([[[
-        0.5798, -0.7981, -0.9280, -1.3311, 1.3687, 0.9277, -0.4164, -1.8274,
-        0.9268, 0.8414
-    ],
-                              [
-                                  5.4247, 1.5113, 2.3944, 1.4740, 5.0300,
-                                  5.1030, 1.9360, 2.1939, 2.1581, 3.4666
-                              ],
-                              [
-                                  -1.6266, -1.0281, -1.0393, -1.6931, -1.3982,
-                                  -0.5732, -1.0830, -1.7561, -1.6786, -1.6967
-                              ]],
-                             [[
-                                 -0.0380, -0.1880, -1.5724, 0.6905, -0.3190,
-                                 0.7798, -0.3693, -0.9457, -0.2942, -1.8527
-                             ],
-                              [
-                                  1.1773, 1.5009, 2.6399, 5.9242, 1.0962,
-                                  2.7346, 6.0865, 1.5555, 4.3303, 2.8229
-                              ],
-                              [
-                                  -0.6646, -0.6870, -0.1125, -0.2224, -0.3445,
-                                  -1.4049, 0.4990, -0.7037, -0.9924, 0.0386
-                              ]]]).cuda()
-
-    output = grouping_operation(festures, idx)
-    expected_output = torch.tensor([[[[0.5798, 0.5798, 0.5798],
-                                      [-1.3311, -1.3311, -1.3311],
-                                      [0.9268, 0.9268, 0.9268],
-                                      [0.5798, 0.5798, 0.5798],
-                                      [0.5798, 0.5798, 0.5798],
-                                      [0.5798, 0.5798, 0.5798]],
-                                     [[5.4247, 5.4247, 5.4247],
-                                      [1.4740, 1.4740, 1.4740],
-                                      [2.1581, 2.1581, 2.1581],
-                                      [5.4247, 5.4247, 5.4247],
-                                      [5.4247, 5.4247, 5.4247],
-                                      [5.4247, 5.4247, 5.4247]],
-                                     [[-1.6266, -1.6266, -1.6266],
-                                      [-1.6931, -1.6931, -1.6931],
-                                      [-1.6786, -1.6786, -1.6786],
-                                      [-1.6266, -1.6266, -1.6266],
-                                      [-1.6266, -1.6266, -1.6266],
-                                      [-1.6266, -1.6266, -1.6266]]],
-                                    [[[-0.0380, -0.0380, -0.0380],
-                                      [-0.3693, -0.3693, -0.3693],
-                                      [-1.8527, -1.8527, -1.8527],
-                                      [-0.0380, -0.0380, -0.0380],
-                                      [-0.0380, -0.0380, -0.0380],
-                                      [-0.0380, -0.0380, -0.0380]],
-                                     [[1.1773, 1.1773, 1.1773],
-                                      [6.0865, 6.0865, 6.0865],
-                                      [2.8229, 2.8229, 2.8229],
-                                      [1.1773, 1.1773, 1.1773],
-                                      [1.1773, 1.1773, 1.1773],
-                                      [1.1773, 1.1773, 1.1773]],
-                                     [[-0.6646, -0.6646, -0.6646],
-                                      [0.4990, 0.4990, 0.4990],
-                                      [0.0386, 0.0386, 0.0386],
-                                      [-0.6646, -0.6646, -0.6646],
-                                      [-0.6646, -0.6646, -0.6646],
-                                      [-0.6646, -0.6646, -0.6646]]]]).cuda()
-    assert torch.allclose(output, expected_output)
-
-
-def test_gather_points():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    features = torch.tensor([[[
-        -1.6095, -0.1029, -0.8876, -1.2447, -2.4031, 0.3708, -1.1586, -1.4967,
-        -0.4800, 0.2252
-    ],
-                              [
-                                  1.9138, 3.4979, 1.6854, 1.5631, 3.6776,
-                                  3.1154, 2.1705, 2.5221, 2.0411, 3.1446
-                              ],
-                              [
-                                  -1.4173, 0.3073, -1.4339, -1.4340, -1.2770,
-                                  -0.2867, -1.4162, -1.4044, -1.4245, -1.4074
-                              ]],
-                             [[
-                                 0.2160, 0.0842, 0.3661, -0.2749, -0.4909,
-                                 -0.6066, -0.8773, -0.0745, -0.9496, 0.1434
-                             ],
-                              [
-                                  1.3644, 1.8087, 1.6855, 1.9563, 1.2746,
-                                  1.9662, 0.9566, 1.8778, 1.1437, 1.3639
-                              ],
-                              [
-                                  -0.7172, 0.1692, 0.2241, 0.0721, -0.7540,
-                                  0.0462, -0.6227, 0.3223, -0.6944, -0.5294
-                              ]]]).cuda()
-
-    idx = torch.tensor([[0, 1, 4, 0, 0, 0], [0, 5, 6, 0, 0, 0]]).int().cuda()
-
-    output = gather_points(features, idx)
-    expected_output = torch.tensor(
-        [[[-1.6095, -0.1029, -2.4031, -1.6095, -1.6095, -1.6095],
-          [1.9138, 3.4979, 3.6776, 1.9138, 1.9138, 1.9138],
-          [-1.4173, 0.3073, -1.2770, -1.4173, -1.4173, -1.4173]],
-         [[0.2160, -0.6066, -0.8773, 0.2160, 0.2160, 0.2160],
-          [1.3644, 1.9662, 0.9566, 1.3644, 1.3644, 1.3644],
-          [-0.7172, 0.0462, -0.6227, -0.7172, -0.7172, -0.7172]]]).cuda()
-
-    assert torch.allclose(output, expected_output)
-    output_half = gather_points(features.half(), idx)
-    assert torch.allclose(output_half, expected_output.half())
-
-
-def test_three_interpolate():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    features = torch.tensor([[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350],
-                              [3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236],
-                              [2.6732, 2.8677, 2.6436, 2.6732, 2.6732, 2.6732],
-                              [0.0124, 7.0150, 7.0199, 0.0124, 0.0124, 0.0124],
-                              [0.3207, 0.0000, 0.3411, 0.3207, 0.3207,
-                               0.3207]],
-                             [[0.0000, 0.9544, 2.4532, 0.0000, 0.0000, 0.0000],
-                              [0.5346, 1.9176, 1.4715, 0.5346, 0.5346, 0.5346],
-                              [0.0000, 0.2744, 2.0842, 0.0000, 0.0000, 0.0000],
-                              [0.3414, 1.5063, 1.6209, 0.3414, 0.3414, 0.3414],
-                              [0.5814, 0.0103, 0.0000, 0.5814, 0.5814,
-                               0.5814]]]).cuda()
-
-    idx = torch.tensor([[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2],
-                         [0, 1, 3]],
-                        [[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4],
-                         [0, 1, 2]]]).int().cuda()
-
-    weight = torch.tensor([[[3.3333e-01, 3.3333e-01, 3.3333e-01],
-                            [1.0000e+00, 5.8155e-08, 2.2373e-08],
-                            [1.0000e+00, 1.7737e-08, 1.7356e-08],
-                            [3.3333e-01, 3.3333e-01, 3.3333e-01],
-                            [3.3333e-01, 3.3333e-01, 3.3333e-01],
-                            [3.3333e-01, 3.3333e-01, 3.3333e-01]],
-                           [[3.3333e-01, 3.3333e-01, 3.3333e-01],
-                            [1.0000e+00, 1.3651e-08, 7.7312e-09],
-                            [1.0000e+00, 1.7148e-08, 1.4070e-08],
-                            [3.3333e-01, 3.3333e-01, 3.3333e-01],
-                            [3.3333e-01, 3.3333e-01, 3.3333e-01],
-                            [3.3333e-01, 3.3333e-01, 3.3333e-01]]]).cuda()
-
-    output = three_interpolate(features, idx, weight)
-    expected_output = torch.tensor([[[
-        3.8953e+00, 4.4995e+00, 4.4995e+00, 3.8953e+00, 3.8953e+00, 3.2072e+00
-    ], [
-        2.9320e+00, 3.0447e+00, 3.0447e+00, 2.9320e+00, 2.9320e+00, 2.9583e+00
-    ], [
-        2.7281e+00, 2.6436e+00, 2.6436e+00, 2.7281e+00, 2.7281e+00, 2.7380e+00
-    ], [
-        4.6824e+00, 7.0199e+00, 7.0199e+00, 4.6824e+00, 4.6824e+00, 2.3466e+00
-    ], [
-        2.2060e-01, 3.4110e-01, 3.4110e-01, 2.2060e-01, 2.2060e-01, 2.1380e-01
-    ]],
-                                    [[
-                                        8.1773e-01, 9.5440e-01, 2.4532e+00,
-                                        8.1773e-01, 8.1773e-01, 1.1359e+00
-                                    ],
-                                     [
-                                         8.4689e-01, 1.9176e+00, 1.4715e+00,
-                                         8.4689e-01, 8.4689e-01, 1.3079e+00
-                                     ],
-                                     [
-                                         6.9473e-01, 2.7440e-01, 2.0842e+00,
-                                         6.9473e-01, 6.9473e-01, 7.8619e-01
-                                     ],
-                                     [
-                                         7.6789e-01, 1.5063e+00, 1.6209e+00,
-                                         7.6789e-01, 7.6789e-01, 1.1562e+00
-                                     ],
-                                     [
-                                         3.8760e-01, 1.0300e-02, 8.3569e-09,
-                                         3.8760e-01, 3.8760e-01, 1.9723e-01
-                                     ]]]).cuda()
-
-    assert torch.allclose(output, expected_output, 1e-4)
-
-
-def test_three_nn():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    known = torch.tensor([[[-1.8373, 3.5605,
-                            -0.7867], [0.7615, 2.9420, 0.2314],
-                           [-0.6503, 3.6637, -1.0622],
-                           [-1.8373, 3.5605, -0.7867],
-                           [-1.8373, 3.5605, -0.7867]],
-                          [[-1.3399, 1.9991, -0.3698],
-                           [-0.0799, 0.9698,
-                            -0.8457], [0.0858, 2.4721, -0.1928],
-                           [-1.3399, 1.9991, -0.3698],
-                           [-1.3399, 1.9991, -0.3698]]]).cuda()
-
-    unknown = torch.tensor([[[-1.8373, 3.5605, -0.7867],
-                             [0.7615, 2.9420, 0.2314],
-                             [-0.6503, 3.6637, -1.0622],
-                             [-1.5237, 2.3976, -0.8097],
-                             [-0.0722, 3.4017, -0.2880],
-                             [0.5198, 3.0661, -0.4605],
-                             [-2.0185, 3.5019, -0.3236],
-                             [0.5098, 3.1020, 0.5799],
-                             [-1.6137, 3.8443, -0.5269],
-                             [0.7341, 2.9626, -0.3189]],
-                            [[-1.3399, 1.9991, -0.3698],
-                             [-0.0799, 0.9698, -0.8457],
-                             [0.0858, 2.4721, -0.1928],
-                             [-0.9022, 1.6560, -1.3090],
-                             [0.1156, 1.6901, -0.4366],
-                             [-0.6477, 2.3576, -0.1563],
-                             [-0.8482, 1.1466, -1.2704],
-                             [-0.8753, 2.0845, -0.3460],
-                             [-0.5621, 1.4233, -1.2858],
-                             [-0.5883, 1.3114, -1.2899]]]).cuda()
-
-    dist, idx = three_nn(unknown, known)
-    expected_dist = torch.tensor([[[0.0000, 0.0000, 0.0000],
-                                   [0.0000, 2.0463, 2.8588],
-                                   [0.0000, 1.2229, 1.2229],
-                                   [1.2047, 1.2047, 1.2047],
-                                   [1.0011, 1.0845, 1.8411],
-                                   [0.7433, 1.4451, 2.4304],
-                                   [0.5007, 0.5007, 0.5007],
-                                   [0.4587, 2.0875, 2.7544],
-                                   [0.4450, 0.4450, 0.4450],
-                                   [0.5514, 1.7206, 2.6811]],
-                                  [[0.0000, 0.0000, 0.0000],
-                                   [0.0000, 1.6464, 1.6952],
-                                   [0.0000, 1.5125, 1.5125],
-                                   [1.0915, 1.0915, 1.0915],
-                                   [0.8197, 0.8511, 1.4894],
-                                   [0.7433, 0.8082, 0.8082],
-                                   [0.8955, 1.3340, 1.3340],
-                                   [0.4730, 0.4730, 0.4730],
-                                   [0.7949, 1.3325, 1.3325],
-                                   [0.7566, 1.3727, 1.3727]]]).cuda()
-    expected_idx = torch.tensor([[[0, 3, 4], [1, 2, 0], [2, 0, 3], [0, 3, 4],
-                                  [2, 1, 0], [1, 2, 0], [0, 3, 4], [1, 2, 0],
-                                  [0, 3, 4], [1, 2, 0]],
-                                 [[0, 3, 4], [1, 2, 0], [2, 0, 3], [0, 3, 4],
-                                  [2, 1, 0], [2, 0, 3], [1, 0, 3], [0, 3, 4],
-                                  [1, 0, 3], [1, 0, 3]]]).cuda()
-
-    assert torch.allclose(dist, expected_dist, 1e-4)
-    assert torch.all(idx == expected_idx)
-
-
-def test_fps_with_dist():
-    if not torch.cuda.is_available():
-        pytest.skip()
-    xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681],
-                         [-0.8070, 2.4137,
-                          -0.5845], [-1.0001, 2.1982, -0.5859],
-                         [0.3841, 1.8983, -0.7431]],
-                        [[-1.0696, 3.0758,
-                          -0.1899], [-0.2559, 3.5521, -0.1402],
-                         [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205],
-                         [-0.0518, 3.7251, -0.3950]]]).cuda()
-
-    expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda()
-    xyz_square_dist = ((xyz.unsqueeze(dim=1) -
-                        xyz.unsqueeze(dim=2))**2).sum(-1)
-    idx = furthest_point_sample_with_dist(xyz_square_dist, 3)
-    assert torch.all(idx == expected_idx)
-
-    import numpy as np
-    fps_idx = np.load('tests/data/ops/fps_idx.npy')
-    features_for_fps_distance = np.load(
-        'tests/data/ops/features_for_fps_distance.npy')
-    expected_idx = torch.from_numpy(fps_idx).cuda()
-    features_for_fps_distance = torch.from_numpy(
-        features_for_fps_distance).cuda()
-
-    idx = furthest_point_sample_with_dist(features_for_fps_distance, 16)
-    assert torch.all(idx == expected_idx)
--- a/tests/test_models/test_common_modules/test_roiaware_pool3d.py
+++ b/tests/test_models/test_common_modules/test_roiaware_pool3d.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import pytest
-import torch
-
-from mmdet3d.ops.roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all,
-                                         points_in_boxes_cpu,
-                                         points_in_boxes_part)
-
-
-def test_RoIAwarePool3d():
-    # RoIAwarePool3d only support gpu version currently.
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    roiaware_pool3d_max = RoIAwarePool3d(
-        out_size=4, max_pts_per_voxel=128, mode='max')
-    roiaware_pool3d_avg = RoIAwarePool3d(
-        out_size=4, max_pts_per_voxel=128, mode='avg')
-    rois = torch.tensor(
-        [[1.0, 2.0, 3.0, 5.0, 4.0, 6.0, -0.3 - np.pi / 2],
-         [-10.0, 23.0, 16.0, 20.0, 10.0, 20.0, -0.5 - np.pi / 2]],
-        dtype=torch.float32).cuda(
-        )  # boxes (m, 7) with bottom center in lidar coordinate
-    pts = torch.tensor(
-        [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
-         [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
-         [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],
-         [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],
-        dtype=torch.float32).cuda()  # points (n, 3) in lidar coordinate
-    pts_feature = pts.clone()
-
-    pooled_features_max = roiaware_pool3d_max(
-        rois=rois, pts=pts, pts_feature=pts_feature)
-    assert pooled_features_max.shape == torch.Size([2, 4, 4, 4, 3])
-    assert torch.allclose(pooled_features_max.sum(),
-                          torch.tensor(51.100).cuda(), 1e-3)
-
-    pooled_features_avg = roiaware_pool3d_avg(
-        rois=rois, pts=pts, pts_feature=pts_feature)
-    assert pooled_features_avg.shape == torch.Size([2, 4, 4, 4, 3])
-    assert torch.allclose(pooled_features_avg.sum(),
-                          torch.tensor(49.750).cuda(), 1e-3)
-
-
-def test_points_in_boxes_part():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    boxes = torch.tensor(
-        [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3]],
-         [[-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],
-        dtype=torch.float32).cuda(
-        )  # boxes (b, t, 7) with bottom center in lidar coordinate
-    pts = torch.tensor(
-        [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
-          [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
-          [4.7, 3.5, -12.2]],
-         [[3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], [-21.3, -52, -5],
-          [0, 0, 0], [6, 7, 8], [-2, -3, -4], [6, 4, 9]]],
-        dtype=torch.float32).cuda()  # points (b, m, 3) in lidar coordinate
-
-    point_indices = points_in_boxes_part(points=pts, boxes=boxes)
-    expected_point_indices = torch.tensor(
-        [[0, 0, 0, 0, 0, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1]],
-        dtype=torch.int32).cuda()
-    assert point_indices.shape == torch.Size([2, 8])
-    assert (point_indices == expected_point_indices).all()
-
-    boxes = torch.tensor([[[0.0, 0.0, 0.0, 1.0, 20.0, 1.0, 0.523598]]],
-                         dtype=torch.float32).cuda()  # 30 degrees
-    pts = torch.tensor(
-        [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0],
-          [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]],
-        dtype=torch.float32).cuda()
-    point_indices = points_in_boxes_part(points=pts, boxes=boxes)
-    expected_point_indices = torch.tensor([[-1, -1, 0, -1, 0, -1, -1, -1]],
-                                          dtype=torch.int32).cuda()
-    assert (point_indices == expected_point_indices).all()
-
-    if torch.cuda.device_count() > 1:
-        pts = pts.to('cuda:1')
-        boxes = boxes.to('cuda:1')
-        expected_point_indices = expected_point_indices.to('cuda:1')
-        point_indices = points_in_boxes_part(points=pts, boxes=boxes)
-        assert point_indices.shape == torch.Size([2, 8])
-        assert (point_indices == expected_point_indices).all()
-
-
-def test_points_in_boxes_cpu():
-    boxes = torch.tensor(
-        [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],
-          [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],
-        dtype=torch.float32
-    )  # boxes (m, 7) with bottom center in lidar coordinate
-    pts = torch.tensor(
-        [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
-          [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
-          [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [
-              -16, -18, 9
-          ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]],
-        dtype=torch.float32)  # points (n, 3) in lidar coordinate
-
-    point_indices = points_in_boxes_cpu(points=pts, boxes=boxes)
-    expected_point_indices = torch.tensor(
-        [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0],
-          [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
-        dtype=torch.int32)
-    assert point_indices.shape == torch.Size([1, 15, 2])
-    assert (point_indices == expected_point_indices).all()
-
-    boxes = torch.tensor([[[0.0, 0.0, 0.0, 1.0, 20.0, 1.0, 0.523598]]],
-                         dtype=torch.float32)  # 30 degrees
-    pts = torch.tensor(
-        [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0],
-          [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]],
-        dtype=torch.float32)
-    point_indices = points_in_boxes_cpu(points=pts, boxes=boxes)
-    expected_point_indices = torch.tensor(
-        [[[0], [0], [1], [0], [1], [0], [0], [0]]], dtype=torch.int32)
-    assert (point_indices == expected_point_indices).all()
-
-
-def test_points_in_boxes_all():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    boxes = torch.tensor(
-        [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],
-          [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],
-        dtype=torch.float32).cuda(
-        )  # boxes (m, 7) with bottom center in lidar coordinate
-    pts = torch.tensor(
-        [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
-          [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
-          [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [
-              -16, -18, 9
-          ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]],
-        dtype=torch.float32).cuda()  # points (n, 3) in lidar coordinate
-
-    point_indices = points_in_boxes_all(points=pts, boxes=boxes)
-    expected_point_indices = torch.tensor(
-        [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0],
-          [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
-        dtype=torch.int32).cuda()
-    assert point_indices.shape == torch.Size([1, 15, 2])
-    assert (point_indices == expected_point_indices).all()
-
-    if torch.cuda.device_count() > 1:
-        pts = pts.to('cuda:1')
-        boxes = boxes.to('cuda:1')
-        expected_point_indices = expected_point_indices.to('cuda:1')
-        point_indices = points_in_boxes_all(points=pts, boxes=boxes)
-        assert point_indices.shape == torch.Size([1, 15, 2])
-        assert (point_indices == expected_point_indices).all()
--- a/tests/test_models/test_common_modules/test_sparse_unet.py
+++ b/tests/test_models/test_common_modules/test_sparse_unet.py
 # Copyright (c) OpenMMLab. All rights reserved.
+import pytest
 import torch
+from mmcv.ops import (SparseConv3d, SparseConvTensor, SparseInverseConv3d,
+                      SubMConv3d)

 from mmdet3d.ops import SparseBasicBlock
-from mmdet3d.ops import spconv as spconv


 def test_SparseUNet():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
    from mmdet3d.models.middle_encoders.sparse_unet import SparseUNet
-    self = SparseUNet(in_channels=4, sparse_shape=[41, 1600, 1408])
+    self = SparseUNet(in_channels=4, sparse_shape=[41, 1600, 1408]).cuda()

    # test encoder layers
    assert len(self.encoder_layers) == 4
    assert self.encoder_layers.encoder_layer1[0][0].in_channels == 16
    assert self.encoder_layers.encoder_layer1[0][0].out_channels == 16
-    assert isinstance(self.encoder_layers.encoder_layer1[0][0],
-                      spconv.conv.SubMConv3d)
+    assert isinstance(self.encoder_layers.encoder_layer1[0][0], SubMConv3d)
    assert isinstance(self.encoder_layers.encoder_layer1[0][1],
                      torch.nn.modules.batchnorm.BatchNorm1d)
    assert isinstance(self.encoder_layers.encoder_layer1[0][2],
                      torch.nn.modules.activation.ReLU)
    assert self.encoder_layers.encoder_layer4[0][0].in_channels == 64
    assert self.encoder_layers.encoder_layer4[0][0].out_channels == 64
-    assert isinstance(self.encoder_layers.encoder_layer4[0][0],
-                      spconv.conv.SparseConv3d)
-    assert isinstance(self.encoder_layers.encoder_layer4[2][0],
-                      spconv.conv.SubMConv3d)
+    assert isinstance(self.encoder_layers.encoder_layer4[0][0], SparseConv3d)
+    assert isinstance(self.encoder_layers.encoder_layer4[2][0], SubMConv3d)

    # test decoder layers
    assert isinstance(self.lateral_layer1, SparseBasicBlock)
-    assert isinstance(self.merge_layer1[0], spconv.conv.SubMConv3d)
-    assert isinstance(self.upsample_layer1[0], spconv.conv.SubMConv3d)
-    assert isinstance(self.upsample_layer2[0], spconv.conv.SparseInverseConv3d)
-
-    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],
-                                   [6.8162713, -2.480431, -1.3616394, 0.36],
-                                   [11.643568, -4.744306, -1.3580885, 0.16],
-                                   [23.482342, 6.5036807, 0.5806964, 0.35]],
-                                  dtype=torch.float32)  # n, point_features
+    assert isinstance(self.merge_layer1[0], SubMConv3d)
+    assert isinstance(self.upsample_layer1[0], SubMConv3d)
+    assert isinstance(self.upsample_layer2[0], SparseInverseConv3d)
+
+    voxel_features = torch.tensor(
+        [[6.56126, 0.9648336, -1.7339306, 0.315],
+         [6.8162713, -2.480431, -1.3616394, 0.36],
+         [11.643568, -4.744306, -1.3580885, 0.16],
+         [23.482342, 6.5036807, 0.5806964, 0.35]],
+        dtype=torch.float32).cuda()  # n, point_features
    coordinates = torch.tensor(
        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
         [1, 35, 930, 469]],
-        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)
+        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)

    unet_ret_dict = self.forward(voxel_features, coordinates, 2)
    seg_features = unet_ret_dict['seg_features']
@@ -51,29 +53,32 @@ def test_SparseUNet():


 def test_SparseBasicBlock():
-    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],
-                                   [6.8162713, -2.480431, -1.3616394, 0.36],
-                                   [11.643568, -4.744306, -1.3580885, 0.16],
-                                   [23.482342, 6.5036807, 0.5806964, 0.35]],
-                                  dtype=torch.float32)  # n, point_features
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    voxel_features = torch.tensor(
+        [[6.56126, 0.9648336, -1.7339306, 0.315],
+         [6.8162713, -2.480431, -1.3616394, 0.36],
+         [11.643568, -4.744306, -1.3580885, 0.16],
+         [23.482342, 6.5036807, 0.5806964, 0.35]],
+        dtype=torch.float32).cuda()  # n, point_features
    coordinates = torch.tensor(
        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
         [1, 35, 930, 469]],
-        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)
+        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)

    # test
-    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
-                                              [41, 1600, 1408], 2)
+    input_sp_tensor = SparseConvTensor(voxel_features, coordinates,
+                                       [41, 1600, 1408], 2)
    self = SparseBasicBlock(
        4,
        4,
        conv_cfg=dict(type='SubMConv3d', indice_key='subm1'),
-        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01))
+        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01)).cuda()
    # test conv and bn layer
-    assert isinstance(self.conv1, spconv.conv.SubMConv3d)
+    assert isinstance(self.conv1, SubMConv3d)
    assert self.conv1.in_channels == 4
    assert self.conv1.out_channels == 4
-    assert isinstance(self.conv2, spconv.conv.SubMConv3d)
+    assert isinstance(self.conv2, SubMConv3d)
    assert self.conv2.out_channels == 4
    assert self.conv2.out_channels == 4
    assert self.bn1.eps == 1e-3
@@ -84,21 +89,24 @@ def test_SparseBasicBlock():


 def test_make_sparse_convmodule():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
    from mmdet3d.ops import make_sparse_convmodule

-    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],
-                                   [6.8162713, -2.480431, -1.3616394, 0.36],
-                                   [11.643568, -4.744306, -1.3580885, 0.16],
-                                   [23.482342, 6.5036807, 0.5806964, 0.35]],
-                                  dtype=torch.float32)  # n, point_features
+    voxel_features = torch.tensor(
+        [[6.56126, 0.9648336, -1.7339306, 0.315],
+         [6.8162713, -2.480431, -1.3616394, 0.36],
+         [11.643568, -4.744306, -1.3580885, 0.16],
+         [23.482342, 6.5036807, 0.5806964, 0.35]],
+        dtype=torch.float32).cuda()  # n, point_features
    coordinates = torch.tensor(
        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
         [1, 35, 930, 469]],
-        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)
+        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)

    # test
-    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
-                                              [41, 1600, 1408], 2)
+    input_sp_tensor = SparseConvTensor(voxel_features, coordinates,
+                                       [41, 1600, 1408], 2)

    sparse_block0 = make_sparse_convmodule(
        4,
@@ -109,8 +117,8 @@ def test_make_sparse_convmodule():
        padding=0,
        conv_type='SubMConv3d',
        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-        order=('conv', 'norm', 'act'))
-    assert isinstance(sparse_block0[0], spconv.SubMConv3d)
+        order=('conv', 'norm', 'act')).cuda()
+    assert isinstance(sparse_block0[0], SubMConv3d)
    assert sparse_block0[0].in_channels == 4
    assert sparse_block0[0].out_channels == 16
    assert isinstance(sparse_block0[1], torch.nn.BatchNorm1d)
@@ -134,4 +142,4 @@ def test_make_sparse_convmodule():
        order=('norm', 'act', 'conv'))
    assert isinstance(sparse_block1[0], torch.nn.BatchNorm1d)
    assert isinstance(sparse_block1[1], torch.nn.ReLU)
-    assert isinstance(sparse_block1[2], spconv.SparseInverseConv3d)
+    assert isinstance(sparse_block1[2], SparseInverseConv3d)
--- a/tests/test_models/test_detectors.py
+++ b/tests/test_models/test_detectors.py
@@ -473,11 +473,11 @@ def test_imvoxelnet():
    assert labels_3d.shape[0] >= 0


-def test_pointrcnn():
+def test_point_rcnn():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    pointrcnn_cfg = _get_detector_cfg(
-        'pointrcnn/pointrcnn_2x8_kitti-3d-3classes.py')
+        'point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py')
    self = build_detector(pointrcnn_cfg).cuda()
    points_0 = torch.rand([1000, 4], device='cuda')
    points_1 = torch.rand([1000, 4], device='cuda')
@@ -539,9 +539,8 @@ def test_smoke():
    attr_labels = None
    img_metas = [
        dict(
-            cam_intrinsic=[[721.5377, 0., 609.5593, 0.],
-                           [0., 721.5377, 172.854, 0.], [0., 0., 1., 0.],
-                           [0., 0., 0., 1.]],
+            cam2img=[[721.5377, 0., 609.5593, 0.], [0., 721.5377, 172.854, 0.],
+                     [0., 0., 1., 0.], [0., 0., 0., 1.]],
            scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
            pad_shape=[384, 1280],
            trans_mat=np.array([[0.25, 0., 0.], [0., 0.25, 0], [0., 0., 1.]],

--- a/tests/test_models/test_heads/test_heads.py
+++ b/tests/test_models/test_heads/test_heads.py
@@ -52,7 +52,6 @@ def _get_head_cfg(fname):
    These are deep copied to allow for safe modification of parameters without
    influencing other tests.
    """
-    import mmcv
    config = _get_config_module(fname)
    model = copy.deepcopy(config.model)
    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
@@ -70,7 +69,6 @@ def _get_rpn_head_cfg(fname):
    These are deep copied to allow for safe modification of parameters without
    influencing other tests.
    """
-    import mmcv
    config = _get_config_module(fname)
    model = copy.deepcopy(config.model)
    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
@@ -88,7 +86,6 @@ def _get_roi_head_cfg(fname):
    These are deep copied to allow for safe modification of parameters without
    influencing other tests.
    """
-    import mmcv
    config = _get_config_module(fname)
    model = copy.deepcopy(config.model)
    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
@@ -106,7 +103,6 @@ def _get_pts_bbox_head_cfg(fname):
    These are deep copied to allow for safe modification of parameters without
    influencing other tests.
    """
-    import mmcv
    config = _get_config_module(fname)
    model = copy.deepcopy(config.model)
    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg.pts))
@@ -132,7 +128,7 @@ def _get_pointrcnn_rpn_head_cfg(fname):
    rpn_head = model.rpn_head
    rpn_head.update(train_cfg=train_cfg.rpn)
    rpn_head.update(test_cfg=test_cfg.rpn)
-    return rpn_head, train_cfg.rpn.rpn_proposal
+    return rpn_head, train_cfg.rpn


 def _get_vote_head_cfg(fname):
@@ -141,7 +137,6 @@ def _get_vote_head_cfg(fname):
    These are deep copied to allow for safe modification of parameters without
    influencing other tests.
    """
-    import mmcv
    config = _get_config_module(fname)
    model = copy.deepcopy(config.model)
    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
@@ -290,11 +285,11 @@ def test_parta2_rpnhead_getboxes():
    assert result_list[0]['boxes_3d'].tensor.shape == torch.Size([512, 7])


-def test_pointrcnn_rpnhead_getboxes():
+def test_point_rcnn_rpnhead_getboxes():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    rpn_head_cfg, proposal_cfg = _get_pointrcnn_rpn_head_cfg(
-        './pointrcnn/pointrcnn_2x8_kitti-3d-3classes.py')
+        './point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py')
    self = build_head(rpn_head_cfg)
    self.cuda()

@@ -315,7 +310,7 @@ def test_pointrcnn_rpnhead_getboxes():
    assert cls_preds.shape == (2, 1024, 3)
    points = torch.rand([2, 1024, 3], dtype=torch.float32).cuda()
    result_list = self.get_bboxes(points, bbox_preds, cls_preds, input_metas)
-    max_num = proposal_cfg.max_num
+    max_num = proposal_cfg.nms_cfg.nms_post
    bbox, score_selected, labels, cls_preds_selected = result_list[0]
    assert bbox.tensor.shape == (max_num, 7)
    assert score_selected.shape == (max_num, )
@@ -515,22 +510,24 @@ def test_smoke_mono3d_head():


 def test_parta2_bbox_head():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
    parta2_bbox_head_cfg = _get_parta2_bbox_head_cfg(
        './parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')
-    self = build_head(parta2_bbox_head_cfg)
-    seg_feats = torch.rand([256, 14, 14, 14, 16])
-    part_feats = torch.rand([256, 14, 14, 14, 4])
+    self = build_head(parta2_bbox_head_cfg).cuda()
+    seg_feats = torch.rand([256, 14, 14, 14, 16]).cuda()
+    part_feats = torch.rand([256, 14, 14, 14, 4]).cuda()

    cls_score, bbox_pred = self.forward(seg_feats, part_feats)
    assert cls_score.shape == (256, 1)
    assert bbox_pred.shape == (256, 7)


-def test_pointrcnn_bbox_head():
+def test_point_rcnn_bbox_head():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    pointrcnn_bbox_head_cfg = _get_pointrcnn_bbox_head_cfg(
-        './pointrcnn/pointrcnn_2x8_kitti-3d-3classes.py')
+        './point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py')
    self = build_head(pointrcnn_bbox_head_cfg).cuda()
    feats = torch.rand([100, 512, 133]).cuda()
    rcnn_cls, rcnn_reg = self.forward(feats)
@@ -612,12 +609,12 @@ def test_part_aggregation_ROI_head():
    assert labels_3d.shape == (12, )


-def test_pointrcnn_roi_head():
+def test_point_rcnn_roi_head():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')

    roi_head_cfg = _get_roi_head_cfg(
-        './pointrcnn/pointrcnn_2x8_kitti-3d-3classes.py')
+        './point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py')

    self = build_head(roi_head_cfg).cuda()


--- a/tests/test_models/test_heads/test_parta2_bbox_head.py
+++ b/tests/test_models/test_heads/test_parta2_bbox_head.py
@@ -2,13 +2,13 @@
 import pytest
 import torch
 from mmcv import Config
+from mmcv.ops import SubMConv3d
 from torch.nn import BatchNorm1d, ReLU

 from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
 from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler
 from mmdet3d.models import PartA2BboxHead
 from mmdet3d.ops import make_sparse_convmodule
-from mmdet3d.ops.spconv.conv import SubMConv3d


 def test_loss():

--- a/tests/test_models/test_heads/test_roi_extractors.py
+++ b/tests/test_models/test_heads/test_roi_extractors.py
@@ -37,8 +37,7 @@ def test_single_roipoint_extractor():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')

-    roi_layer_cfg = dict(
-        type='RoIPointPool3d', num_sampled_points=512, pool_extra_width=0)
+    roi_layer_cfg = dict(type='RoIPointPool3d', num_sampled_points=512)

    self = Single3DRoIPointExtractor(roi_layer=roi_layer_cfg)


--- a/tests/test_models/test_necks/test_necks.py
+++ b/tests/test_models/test_necks/test_necks.py
@@ -114,7 +114,7 @@ def test_dla_neck():
            for i in range(len(in_channels))
        ]
        outputs = neck(feats)
-        assert outputs.shape == (4, 64, 8, 8)
+        assert outputs[0].shape == (4, 64, 8, 8)
    else:
        # Test DLA Neck without DCNv2 on CPU
        neck_cfg = dict(

--- a/tests/test_models/test_voxel_encoder/test_dynamic_scatter.py
+++ b/tests/test_models/test_voxel_encoder/test_dynamic_scatter.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import pytest
-import torch
-from torch.autograd import gradcheck
-
-from mmdet3d.ops import DynamicScatter
-
-
-def test_dynamic_scatter():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    dsmean = DynamicScatter([0.32, 0.32, 6],
-                            [-74.88, -74.88, -2, 74.88, 74.88, 4], True)
-    dsmax = DynamicScatter([0.32, 0.32, 6],
-                           [-74.88, -74.88, -2, 74.88, 74.88, 4], False)
-
-    # test empty input
-    empty_feats = torch.empty(size=(0, 3), dtype=torch.float32, device='cuda')
-    empty_coors = torch.empty(size=(0, 3), dtype=torch.int32, device='cuda')
-
-    empty_feats.requires_grad_()
-    empty_feats_out_mean, empty_coors_out_mean = dsmean(
-        empty_feats, empty_coors)
-    empty_feats_out_mean.sum().backward()
-    empty_feats_out_max, empty_coors_out_max = dsmax(empty_feats, empty_coors)
-    empty_feats_out_max.sum().backward()
-
-    assert empty_feats_out_mean.shape == empty_feats.shape
-    assert empty_feats_out_max.shape == empty_feats.shape
-    assert empty_coors_out_mean.shape == empty_coors.shape
-    assert empty_coors_out_max.shape == empty_coors.shape
-
-    # test empty reduced output
-    empty_o_feats = torch.rand(
-        size=(200000, 3), dtype=torch.float32, device='cuda') * 100 - 50
-    empty_o_coors = torch.randint(
-        low=-1, high=0, size=(200000, 3), dtype=torch.int32, device='cuda')
-
-    empty_o_feats.requires_grad_()
-    empty_o_feats_out_mean, empty_o_coors_out_mean = dsmean(
-        empty_o_feats, empty_o_coors)
-    empty_o_feats_out_mean.sum().backward()
-    assert (empty_o_feats.grad == 0).all()
-
-    empty_o_feats_out_max, empty_o_coors_out_max = dsmax(
-        empty_o_feats, empty_o_coors)
-    empty_o_feats_out_max.sum().backward()
-    assert (empty_o_feats.grad == 0).all()
-
-    # test non-empty input
-    feats = torch.rand(
-        size=(200000, 3), dtype=torch.float32, device='cuda') * 100 - 50
-    coors = torch.randint(
-        low=-1, high=20, size=(200000, 3), dtype=torch.int32, device='cuda')
-
-    ref_voxel_coors = coors.unique(dim=0, sorted=True)
-    ref_voxel_coors = ref_voxel_coors[ref_voxel_coors.min(dim=-1).values >= 0]
-    ref_voxel_feats_mean = []
-    ref_voxel_feats_max = []
-    for ref_voxel_coor in ref_voxel_coors:
-        voxel_mask = (coors == ref_voxel_coor).all(dim=-1)
-        ref_voxel_feats_mean.append(feats[voxel_mask].mean(dim=0))
-        ref_voxel_feats_max.append(feats[voxel_mask].max(dim=0).values)
-    ref_voxel_feats_mean = torch.stack(ref_voxel_feats_mean)
-    ref_voxel_feats_max = torch.stack(ref_voxel_feats_max)
-
-    feats_out_mean, coors_out_mean = dsmean(feats, coors)
-    seq_mean = (coors_out_mean[:, 0] * 400 + coors_out_mean[:, 1] * 20 +
-                coors_out_mean[:, 2]).argsort()
-    feats_out_mean = feats_out_mean[seq_mean]
-    coors_out_mean = coors_out_mean[seq_mean]
-
-    feats_out_max, coors_out_max = dsmax(feats, coors)
-    seq_max = (coors_out_max[:, 0] * 400 + coors_out_max[:, 1] * 20 +
-               coors_out_max[:, 2]).argsort()
-    feats_out_max = feats_out_max[seq_max]
-    coors_cout_max = coors_out_max[seq_max]
-
-    assert (coors_out_mean == ref_voxel_coors).all()
-    assert torch.allclose(
-        feats_out_mean, ref_voxel_feats_mean, atol=1e-2, rtol=1e-5)
-    assert (coors_cout_max == ref_voxel_coors).all()
-    assert torch.allclose(
-        feats_out_max, ref_voxel_feats_max, atol=1e-2, rtol=1e-5)
-
-    # test non-empty input without any point out of bound
-    feats = torch.rand(
-        size=(200000, 3), dtype=torch.float32, device='cuda') * 100 - 50
-    coors = torch.randint(
-        low=0, high=20, size=(200000, 3), dtype=torch.int32, device='cuda')
-
-    ref_voxel_coors = coors.unique(dim=0, sorted=True)
-    ref_voxel_coors = ref_voxel_coors[ref_voxel_coors.min(dim=-1).values >= 0]
-    ref_voxel_feats_mean = []
-    ref_voxel_feats_max = []
-    for ref_voxel_coor in ref_voxel_coors:
-        voxel_mask = (coors == ref_voxel_coor).all(dim=-1)
-        ref_voxel_feats_mean.append(feats[voxel_mask].mean(dim=0))
-        ref_voxel_feats_max.append(feats[voxel_mask].max(dim=0).values)
-    ref_voxel_feats_mean = torch.stack(ref_voxel_feats_mean)
-    ref_voxel_feats_max = torch.stack(ref_voxel_feats_max)
-
-    feats_out_mean, coors_out_mean = dsmean(feats, coors)
-    seq_mean = (coors_out_mean[:, 0] * 400 + coors_out_mean[:, 1] * 20 +
-                coors_out_mean[:, 2]).argsort()
-    feats_out_mean = feats_out_mean[seq_mean]
-    coors_out_mean = coors_out_mean[seq_mean]
-
-    feats_out_max, coors_out_max = dsmax(feats, coors)
-    seq_max = (coors_out_max[:, 0] * 400 + coors_out_max[:, 1] * 20 +
-               coors_out_max[:, 2]).argsort()
-    feats_out_max = feats_out_max[seq_max]
-    coors_cout_max = coors_out_max[seq_max]
-
-    assert (coors_out_mean == ref_voxel_coors).all()
-    assert torch.allclose(
-        feats_out_mean, ref_voxel_feats_mean, atol=1e-2, rtol=1e-5)
-    assert (coors_cout_max == ref_voxel_coors).all()
-    assert torch.allclose(
-        feats_out_max, ref_voxel_feats_max, atol=1e-2, rtol=1e-5)
-
-    # test grad #
-    feats = torch.rand(
-        size=(100, 4), dtype=torch.float32, device='cuda') * 100 - 50
-    coors = torch.randint(
-        low=-1, high=3, size=(100, 3), dtype=torch.int32, device='cuda')
-    feats.requires_grad_()
-    gradcheck(dsmean, (feats, coors), eps=1e-2, atol=1e-2, rtol=1e-5)
-    gradcheck(dsmax, (feats, coors), eps=1e-2, atol=1e-2, rtol=1e-5)
--- a/tests/test_models/test_voxel_encoder/test_voxelize.py
+++ b/tests/test_models/test_voxel_encoder/test_voxelize.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import pytest
-import torch
-
-from mmdet3d.core.voxel.voxel_generator import VoxelGenerator
-from mmdet3d.datasets.pipelines import LoadPointsFromFile
-from mmdet3d.ops.voxel.voxelize import Voxelization
-
-
-def _get_voxel_points_indices(points, coors, voxel):
-    result_form = np.equal(coors, voxel)
-    return result_form[:, 0] & result_form[:, 1] & result_form[:, 2]
-
-
-def test_voxelization():
-    voxel_size = [0.5, 0.5, 0.5]
-    point_cloud_range = [0, -40, -3, 70.4, 40, 1]
-    max_num_points = 1000
-    self = VoxelGenerator(voxel_size, point_cloud_range, max_num_points)
-    data_path = './tests/data/kitti/training/velodyne_reduced/000000.bin'
-    load_points_from_file = LoadPointsFromFile(
-        coord_type='LIDAR', load_dim=4, use_dim=4)
-    results = dict()
-    results['pts_filename'] = data_path
-    results = load_points_from_file(results)
-    points = results['points'].tensor.numpy()
-    voxels_generator = self.generate(points)
-    coors, voxels, num_points_per_voxel = voxels_generator
-    expected_coors = coors
-    expected_voxels = voxels
-    expected_num_points_per_voxel = num_points_per_voxel
-
-    points = torch.tensor(points)
-    max_num_points = -1
-    dynamic_voxelization = Voxelization(voxel_size, point_cloud_range,
-                                        max_num_points)
-    max_num_points = 1000
-    hard_voxelization = Voxelization(voxel_size, point_cloud_range,
-                                     max_num_points)
-    # test hard_voxelization on cpu
-    coors, voxels, num_points_per_voxel = hard_voxelization.forward(points)
-    coors = coors.detach().numpy()
-    voxels = voxels.detach().numpy()
-    num_points_per_voxel = num_points_per_voxel.detach().numpy()
-    assert np.all(coors == expected_coors)
-    assert np.all(voxels == expected_voxels)
-    assert np.all(num_points_per_voxel == expected_num_points_per_voxel)
-
-    # test dynamic_voxelization on cpu
-    coors = dynamic_voxelization.forward(points)
-    coors = coors.detach().numpy()
-    points = points.detach().numpy()
-    for i in range(expected_voxels.shape[0]):
-        indices = _get_voxel_points_indices(points, coors, expected_voxels[i])
-        num_points_current_voxel = points[indices].shape[0]
-        assert num_points_current_voxel > 0
-        assert np.all(
-            points[indices] == expected_coors[i][:num_points_current_voxel])
-        assert num_points_current_voxel == expected_num_points_per_voxel[i]
-
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-    # test hard_voxelization on gpu
-    points = torch.tensor(points).contiguous().to(device='cuda:0')
-    coors, voxels, num_points_per_voxel = hard_voxelization.forward(points)
-    coors = coors.cpu().detach().numpy()
-    voxels = voxels.cpu().detach().numpy()
-    num_points_per_voxel = num_points_per_voxel.cpu().detach().numpy()
-    assert np.all(coors == expected_coors)
-    assert np.all(voxels == expected_voxels)
-    assert np.all(num_points_per_voxel == expected_num_points_per_voxel)
-
-    # test dynamic_voxelization on gpu
-    coors = dynamic_voxelization.forward(points)
-    coors = coors.cpu().detach().numpy()
-    points = points.cpu().detach().numpy()
-    for i in range(expected_voxels.shape[0]):
-        indices = _get_voxel_points_indices(points, coors, expected_voxels[i])
-        num_points_current_voxel = points[indices].shape[0]
-        assert num_points_current_voxel > 0
-        assert np.all(
-            points[indices] == expected_coors[i][:num_points_current_voxel])
-        assert num_points_current_voxel == expected_num_points_per_voxel[i]
-
-
-def test_voxelization_nondeterministic():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
-
-    voxel_size = [0.5, 0.5, 0.5]
-    point_cloud_range = [0, -40, -3, 70.4, 40, 1]
-    data_path = './tests/data/kitti/training/velodyne_reduced/000000.bin'
-    load_points_from_file = LoadPointsFromFile(
-        coord_type='LIDAR', load_dim=4, use_dim=4)
-    results = dict()
-    results['pts_filename'] = data_path
-    results = load_points_from_file(results)
-    points = results['points'].tensor.numpy()
-
-    points = torch.tensor(points)
-    max_num_points = -1
-    dynamic_voxelization = Voxelization(voxel_size, point_cloud_range,
-                                        max_num_points)
-
-    max_num_points = 10
-    max_voxels = 50
-    hard_voxelization = Voxelization(
-        voxel_size,
-        point_cloud_range,
-        max_num_points,
-        max_voxels,
-        deterministic=False)
-
-    # test hard_voxelization (non-deterministic version) on gpu
-    points = torch.tensor(points).contiguous().to(device='cuda:0')
-    voxels, coors, num_points_per_voxel = hard_voxelization.forward(points)
-    coors = coors.cpu().detach().numpy().tolist()
-    voxels = voxels.cpu().detach().numpy().tolist()
-    num_points_per_voxel = num_points_per_voxel.cpu().detach().numpy().tolist()
-
-    coors_all = dynamic_voxelization.forward(points)
-    coors_all = coors_all.cpu().detach().numpy().tolist()
-
-    coors_set = set([tuple(c) for c in coors])
-    coors_all_set = set([tuple(c) for c in coors_all])
-
-    assert len(coors_set) == len(coors)
-    assert len(coors_set - coors_all_set) == 0
-
-    points = points.cpu().detach().numpy().tolist()
-
-    coors_points_dict = {}
-    for c, ps in zip(coors_all, points):
-        if tuple(c) not in coors_points_dict:
-            coors_points_dict[tuple(c)] = set()
-        coors_points_dict[tuple(c)].add(tuple(ps))
-
-    for c, ps, n in zip(coors, voxels, num_points_per_voxel):
-        ideal_voxel_points_set = coors_points_dict[tuple(c)]
-        voxel_points_set = set([tuple(p) for p in ps[:n]])
-        assert len(voxel_points_set) == n
-        if n < max_num_points:
-            assert voxel_points_set == ideal_voxel_points_set
-            for p in ps[n:]:
-                assert max(p) == min(p) == 0
-        else:
-            assert len(voxel_points_set - ideal_voxel_points_set) == 0
-
-    # test hard_voxelization (non-deterministic version) on gpu
-    # with all input point in range
-    points = torch.tensor(points).contiguous().to(device='cuda:0')[:max_voxels]
-    coors_all = dynamic_voxelization.forward(points)
-    valid_mask = coors_all.ge(0).all(-1)
-    points = points[valid_mask]
-    coors_all = coors_all[valid_mask]
-    coors_all = coors_all.cpu().detach().numpy().tolist()
-
-    voxels, coors, num_points_per_voxel = hard_voxelization.forward(points)
-    coors = coors.cpu().detach().numpy().tolist()
-
-    coors_set = set([tuple(c) for c in coors])
-    coors_all_set = set([tuple(c) for c in coors_all])
-
-    assert len(coors_set) == len(coors) == len(coors_all_set)
--- a/tools/create_data.py
+++ b/tools/create_data.py
@@ -6,7 +6,8 @@ from tools.data_converter import indoor_converter as indoor
 from tools.data_converter import kitti_converter as kitti
 from tools.data_converter import lyft_converter as lyft_converter
 from tools.data_converter import nuscenes_converter as nuscenes_converter
-from tools.data_converter.create_gt_database import create_groundtruth_database
+from tools.data_converter.create_gt_database import (
+    create_groundtruth_database, GTDatabaseCreater)


 def kitti_data_prep(root_path,
@@ -181,14 +182,16 @@ def waymo_data_prep(root_path,
        converter.convert()
    # Generate waymo infos
    out_dir = osp.join(out_dir, 'kitti_format')
-    kitti.create_waymo_info_file(out_dir, info_prefix, max_sweeps=max_sweeps)
-    create_groundtruth_database(
+    kitti.create_waymo_info_file(
+        out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers)
+    GTDatabaseCreater(
        'WaymoDataset',
        out_dir,
        info_prefix,
        f'{out_dir}/{info_prefix}_infos_train.pkl',
        relative_path=False,
-        with_mask=False)
+        with_mask=False,
+        num_worker=workers).create()


 parser = argparse.ArgumentParser(description='Data converter arg parser')

--- a/tools/data_converter/create_gt_database.py
+++ b/tools/data_converter/create_gt_database.py
@@ -124,7 +124,7 @@ def create_groundtruth_database(dataset_class_name,
    """Given the raw data, generate the ground truth database.

    Args:
-        dataset_class_name （str): Name of the input dataset.
+        dataset_class_name (str): Name of the input dataset.
        data_path (str): Path of the data.
        info_prefix (str): Prefix of the info file.
        info_path (str, optional): Path of the info file.
@@ -207,7 +207,7 @@ def create_groundtruth_database(dataset_class_name,
                    type='LoadPointsFromFile',
                    coord_type='LIDAR',
                    load_dim=6,
-                    use_dim=5,
+                    use_dim=6,
                    file_client_args=file_client_args),
                dict(
                    type='LoadAnnotations3D',
@@ -337,3 +337,288 @@ def create_groundtruth_database(dataset_class_name,

    with open(db_info_save_path, 'wb') as f:
        pickle.dump(all_db_infos, f)
+
+
+class GTDatabaseCreater:
+    """Given the raw data, generate the ground truth database. This is the
+    parallel version. For serialized version, please refer to
+    `create_groundtruth_database`
+
+    Args:
+        dataset_class_name (str): Name of the input dataset.
+        data_path (str): Path of the data.
+        info_prefix (str): Prefix of the info file.
+        info_path (str, optional): Path of the info file.
+            Default: None.
+        mask_anno_path (str, optional): Path of the mask_anno.
+            Default: None.
+        used_classes (list[str], optional): Classes have been used.
+            Default: None.
+        database_save_path (str, optional): Path to save database.
+            Default: None.
+        db_info_save_path (str, optional): Path to save db_info.
+            Default: None.
+        relative_path (bool, optional): Whether to use relative path.
+            Default: True.
+        with_mask (bool, optional): Whether to use mask.
+            Default: False.
+        num_worker (int, optional): the number of parallel workers to use.
+            Default: 8.
+    """
+
+    def __init__(self,
+                 dataset_class_name,
+                 data_path,
+                 info_prefix,
+                 info_path=None,
+                 mask_anno_path=None,
+                 used_classes=None,
+                 database_save_path=None,
+                 db_info_save_path=None,
+                 relative_path=True,
+                 add_rgb=False,
+                 lidar_only=False,
+                 bev_only=False,
+                 coors_range=None,
+                 with_mask=False,
+                 num_worker=8) -> None:
+        self.dataset_class_name = dataset_class_name
+        self.data_path = data_path
+        self.info_prefix = info_prefix
+        self.info_path = info_path
+        self.mask_anno_path = mask_anno_path
+        self.used_classes = used_classes
+        self.database_save_path = database_save_path
+        self.db_info_save_path = db_info_save_path
+        self.relative_path = relative_path
+        self.add_rgb = add_rgb
+        self.lidar_only = lidar_only
+        self.bev_only = bev_only
+        self.coors_range = coors_range
+        self.with_mask = with_mask
+        self.num_worker = num_worker
+        self.pipeline = None
+
+    def create_single(self, input_dict):
+        group_counter = 0
+        single_db_infos = dict()
+        example = self.pipeline(input_dict)
+        annos = example['ann_info']
+        image_idx = example['sample_idx']
+        points = example['points'].tensor.numpy()
+        gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
+        names = annos['gt_names']
+        group_dict = dict()
+        if 'group_ids' in annos:
+            group_ids = annos['group_ids']
+        else:
+            group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
+        difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
+        if 'difficulty' in annos:
+            difficulty = annos['difficulty']
+
+        num_obj = gt_boxes_3d.shape[0]
+        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
+
+        if self.with_mask:
+            # prepare masks
+            gt_boxes = annos['gt_bboxes']
+            img_path = osp.split(example['img_info']['filename'])[-1]
+            if img_path not in self.file2id.keys():
+                print(f'skip image {img_path} for empty mask')
+                return single_db_infos
+            img_id = self.file2id[img_path]
+            kins_annIds = self.coco.getAnnIds(imgIds=img_id)
+            kins_raw_info = self.coco.loadAnns(kins_annIds)
+            kins_ann_info = _parse_coco_ann_info(kins_raw_info)
+            h, w = annos['img_shape'][:2]
+            gt_masks = [
+                _poly2mask(mask, h, w) for mask in kins_ann_info['masks']
+            ]
+            # get mask inds based on iou mapping
+            bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
+            mask_inds = bbox_iou.argmax(axis=0)
+            valid_inds = (bbox_iou.max(axis=0) > 0.5)
+
+            # mask the image
+            # use more precise crop when it is ready
+            # object_img_patches = np.ascontiguousarray(
+            #     np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
+            # crop image patches using roi_align
+            # object_img_patches = crop_image_patch_v2(
+            #     torch.Tensor(gt_boxes),
+            #     torch.Tensor(mask_inds).long(), object_img_patches)
+            object_img_patches, object_masks = crop_image_patch(
+                gt_boxes, gt_masks, mask_inds, annos['img'])
+
+        for i in range(num_obj):
+            filename = f'{image_idx}_{names[i]}_{i}.bin'
+            abs_filepath = osp.join(self.database_save_path, filename)
+            rel_filepath = osp.join(f'{self.info_prefix}_gt_database',
+                                    filename)
+
+            # save point clouds and image patches for each object
+            gt_points = points[point_indices[:, i]]
+            gt_points[:, :3] -= gt_boxes_3d[i, :3]
+
+            if self.with_mask:
+                if object_masks[i].sum() == 0 or not valid_inds[i]:
+                    # Skip object for empty or invalid mask
+                    continue
+                img_patch_path = abs_filepath + '.png'
+                mask_patch_path = abs_filepath + '.mask.png'
+                mmcv.imwrite(object_img_patches[i], img_patch_path)
+                mmcv.imwrite(object_masks[i], mask_patch_path)
+
+            with open(abs_filepath, 'w') as f:
+                gt_points.tofile(f)
+
+            if (self.used_classes is None) or names[i] in self.used_classes:
+                db_info = {
+                    'name': names[i],
+                    'path': rel_filepath,
+                    'image_idx': image_idx,
+                    'gt_idx': i,
+                    'box3d_lidar': gt_boxes_3d[i],
+                    'num_points_in_gt': gt_points.shape[0],
+                    'difficulty': difficulty[i],
+                }
+                local_group_id = group_ids[i]
+                # if local_group_id >= 0:
+                if local_group_id not in group_dict:
+                    group_dict[local_group_id] = group_counter
+                    group_counter += 1
+                db_info['group_id'] = group_dict[local_group_id]
+                if 'score' in annos:
+                    db_info['score'] = annos['score'][i]
+                if self.with_mask:
+                    db_info.update({'box2d_camera': gt_boxes[i]})
+                if names[i] in single_db_infos:
+                    single_db_infos[names[i]].append(db_info)
+                else:
+                    single_db_infos[names[i]] = [db_info]
+
+        return single_db_infos
+
+    def create(self):
+        print(f'Create GT Database of {self.dataset_class_name}')
+        dataset_cfg = dict(
+            type=self.dataset_class_name,
+            data_root=self.data_path,
+            ann_file=self.info_path)
+        if self.dataset_class_name == 'KittiDataset':
+            file_client_args = dict(backend='disk')
+            dataset_cfg.update(
+                test_mode=False,
+                split='training',
+                modality=dict(
+                    use_lidar=True,
+                    use_depth=False,
+                    use_lidar_intensity=True,
+                    use_camera=self.with_mask,
+                ),
+                pipeline=[
+                    dict(
+                        type='LoadPointsFromFile',
+                        coord_type='LIDAR',
+                        load_dim=4,
+                        use_dim=4,
+                        file_client_args=file_client_args),
+                    dict(
+                        type='LoadAnnotations3D',
+                        with_bbox_3d=True,
+                        with_label_3d=True,
+                        file_client_args=file_client_args)
+                ])
+
+        elif self.dataset_class_name == 'NuScenesDataset':
+            dataset_cfg.update(
+                use_valid_flag=True,
+                pipeline=[
+                    dict(
+                        type='LoadPointsFromFile',
+                        coord_type='LIDAR',
+                        load_dim=5,
+                        use_dim=5),
+                    dict(
+                        type='LoadPointsFromMultiSweeps',
+                        sweeps_num=10,
+                        use_dim=[0, 1, 2, 3, 4],
+                        pad_empty_sweeps=True,
+                        remove_close=True),
+                    dict(
+                        type='LoadAnnotations3D',
+                        with_bbox_3d=True,
+                        with_label_3d=True)
+                ])
+
+        elif self.dataset_class_name == 'WaymoDataset':
+            file_client_args = dict(backend='disk')
+            dataset_cfg.update(
+                test_mode=False,
+                split='training',
+                modality=dict(
+                    use_lidar=True,
+                    use_depth=False,
+                    use_lidar_intensity=True,
+                    use_camera=False,
+                ),
+                pipeline=[
+                    dict(
+                        type='LoadPointsFromFile',
+                        coord_type='LIDAR',
+                        load_dim=6,
+                        use_dim=6,
+                        file_client_args=file_client_args),
+                    dict(
+                        type='LoadAnnotations3D',
+                        with_bbox_3d=True,
+                        with_label_3d=True,
+                        file_client_args=file_client_args)
+                ])
+
+        dataset = build_dataset(dataset_cfg)
+        self.pipeline = dataset.pipeline
+        if self.database_save_path is None:
+            self.database_save_path = osp.join(
+                self.data_path, f'{self.info_prefix}_gt_database')
+        if self.db_info_save_path is None:
+            self.db_info_save_path = osp.join(
+                self.data_path, f'{self.info_prefix}_dbinfos_train.pkl')
+        mmcv.mkdir_or_exist(self.database_save_path)
+        if self.with_mask:
+            self.coco = COCO(osp.join(self.data_path, self.mask_anno_path))
+            imgIds = self.coco.getImgIds()
+            self.file2id = dict()
+            for i in imgIds:
+                info = self.coco.loadImgs([i])[0]
+                self.file2id.update({info['file_name']: i})
+
+        def loop_dataset(i):
+            input_dict = dataset.get_data_info(i)
+            dataset.pre_pipeline(input_dict)
+            return input_dict
+
+        multi_db_infos = mmcv.track_parallel_progress(
+            self.create_single, ((loop_dataset(i)
+                                  for i in range(len(dataset))), len(dataset)),
+            self.num_worker)
+        print('Make global unique group id')
+        group_counter_offset = 0
+        all_db_infos = dict()
+        for single_db_infos in track_iter_progress(multi_db_infos):
+            group_id = -1
+            for name, name_db_infos in single_db_infos.items():
+                for db_info in name_db_infos:
+                    group_id = max(group_id, db_info['group_id'])
+                    db_info['group_id'] += group_counter_offset
+                if name not in all_db_infos:
+                    all_db_infos[name] = []
+                all_db_infos[name].extend(name_db_infos)
+            group_counter_offset += (group_id + 1)
+
+        for k, v in all_db_infos.items():
+            print(f'load {len(v)} {k} database infos')
+
+        with open(self.db_info_save_path, 'wb') as f:
+            pickle.dump(all_db_infos, f)
--- a/tools/data_converter/kitti_converter.py
+++ b/tools/data_converter/kitti_converter.py
@@ -7,7 +7,7 @@ import numpy as np
 from nuscenes.utils.geometry_utils import view_points

 from mmdet3d.core.bbox import box_np_ops, points_cam2img
-from .kitti_data_utils import get_kitti_image_info, get_waymo_image_info
+from .kitti_data_utils import WaymoInfoGatherer, get_kitti_image_info
 from .nuscenes_converter import post_process_coords

 kitti_categories = ('Pedestrian', 'Cyclist', 'Car')
@@ -44,6 +44,75 @@ def _read_imageset_file(path):
    return [int(line) for line in lines]


+class _NumPointsInGTCalculater:
+    """Calculate the number of points inside the ground truth box. This is the
+    parallel version. For the serialized version, please refer to
+    `_calculate_num_points_in_gt`.
+
+    Args:
+        data_path (str): Path of the data.
+        relative_path (bool): Whether to use relative path.
+        remove_outside (bool, optional): Whether to remove points which are
+            outside of image. Default: True.
+        num_features (int, optional): Number of features per point.
+            Default: False.
+        num_worker (int, optional): the number of parallel workers to use.
+            Default: 8.
+    """
+
+    def __init__(self,
+                 data_path,
+                 relative_path,
+                 remove_outside=True,
+                 num_features=4,
+                 num_worker=8) -> None:
+        self.data_path = data_path
+        self.relative_path = relative_path
+        self.remove_outside = remove_outside
+        self.num_features = num_features
+        self.num_worker = num_worker
+
+    def calculate_single(self, info):
+        pc_info = info['point_cloud']
+        image_info = info['image']
+        calib = info['calib']
+        if self.relative_path:
+            v_path = str(Path(self.data_path) / pc_info['velodyne_path'])
+        else:
+            v_path = pc_info['velodyne_path']
+        points_v = np.fromfile(
+            v_path, dtype=np.float32,
+            count=-1).reshape([-1, self.num_features])
+        rect = calib['R0_rect']
+        Trv2c = calib['Tr_velo_to_cam']
+        P2 = calib['P2']
+        if self.remove_outside:
+            points_v = box_np_ops.remove_outside_points(
+                points_v, rect, Trv2c, P2, image_info['image_shape'])
+        annos = info['annos']
+        num_obj = len([n for n in annos['name'] if n != 'DontCare'])
+        dims = annos['dimensions'][:num_obj]
+        loc = annos['location'][:num_obj]
+        rots = annos['rotation_y'][:num_obj]
+        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
+                                         axis=1)
+        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
+            gt_boxes_camera, rect, Trv2c)
+        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
+        num_points_in_gt = indices.sum(0)
+        num_ignored = len(annos['dimensions']) - num_obj
+        num_points_in_gt = np.concatenate(
+            [num_points_in_gt, -np.ones([num_ignored])])
+        annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
+        return info
+
+    def calculate(self, infos):
+        ret_infos = mmcv.track_parallel_progress(self.calculate_single, infos,
+                                                 self.num_worker)
+        for i, ret_info in enumerate(ret_infos):
+            infos[i] = ret_info
+
+
 def _calculate_num_points_in_gt(data_path,
                                infos,
                                relative_path,
@@ -161,7 +230,8 @@ def create_waymo_info_file(data_path,
                           pkl_prefix='waymo',
                           save_path=None,
                           relative_path=True,
-                           max_sweeps=5):
+                           max_sweeps=5,
+                           workers=8):
    """Create info file of waymo dataset.

    Given the raw data, generate its related info file in pkl format.
@@ -187,55 +257,46 @@ def create_waymo_info_file(data_path,
        save_path = Path(data_path)
    else:
        save_path = Path(save_path)
-    waymo_infos_train = get_waymo_image_info(
+    waymo_infos_gatherer_trainval = WaymoInfoGatherer(
        data_path,
        training=True,
        velodyne=True,
        calib=True,
        pose=True,
-        image_ids=train_img_ids,
        relative_path=relative_path,
-        max_sweeps=max_sweeps)
-    _calculate_num_points_in_gt(
-        data_path,
-        waymo_infos_train,
-        relative_path,
-        num_features=6,
-        remove_outside=False)
-    filename = save_path / f'{pkl_prefix}_infos_train.pkl'
-    print(f'Waymo info train file is saved to {filename}')
-    mmcv.dump(waymo_infos_train, filename)
-    waymo_infos_val = get_waymo_image_info(
+        max_sweeps=max_sweeps,
+        num_worker=workers)
+    waymo_infos_gatherer_test = WaymoInfoGatherer(
        data_path,
-        training=True,
+        training=False,
+        label_info=False,
        velodyne=True,
        calib=True,
        pose=True,
-        image_ids=val_img_ids,
        relative_path=relative_path,
-        max_sweeps=max_sweeps)
-    _calculate_num_points_in_gt(
+        max_sweeps=max_sweeps,
+        num_worker=workers)
+    num_points_in_gt_calculater = _NumPointsInGTCalculater(
        data_path,
-        waymo_infos_val,
        relative_path,
        num_features=6,
-        remove_outside=False)
+        remove_outside=False,
+        num_worker=workers)
+
+    waymo_infos_train = waymo_infos_gatherer_trainval.gather(train_img_ids)
+    num_points_in_gt_calculater.calculate(waymo_infos_train)
+    filename = save_path / f'{pkl_prefix}_infos_train.pkl'
+    print(f'Waymo info train file is saved to {filename}')
+    mmcv.dump(waymo_infos_train, filename)
+    waymo_infos_val = waymo_infos_gatherer_trainval.gather(val_img_ids)
+    num_points_in_gt_calculater.calculate(waymo_infos_val)
    filename = save_path / f'{pkl_prefix}_infos_val.pkl'
    print(f'Waymo info val file is saved to {filename}')
    mmcv.dump(waymo_infos_val, filename)
    filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
    print(f'Waymo info trainval file is saved to {filename}')
    mmcv.dump(waymo_infos_train + waymo_infos_val, filename)
-    waymo_infos_test = get_waymo_image_info(
-        data_path,
-        training=False,
-        label_info=False,
-        velodyne=True,
-        calib=True,
-        pose=True,
-        image_ids=test_img_ids,
-        relative_path=relative_path,
-        max_sweeps=max_sweeps)
+    waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids)
    filename = save_path / f'{pkl_prefix}_infos_test.pkl'
    print(f'Waymo info test file is saved to {filename}')
    mmcv.dump(waymo_infos_test, filename)

--- a/tools/data_converter/kitti_data_utils.py
+++ b/tools/data_converter/kitti_data_utils.py
@@ -6,6 +6,7 @@ from pathlib import Path

 import mmcv
 import numpy as np
+from PIL import Image
 from skimage import io


@@ -102,6 +103,16 @@ def get_pose_path(idx,
                               relative_path, exist_check, use_prefix_id)


+def get_timestamp_path(idx,
+                       prefix,
+                       training=True,
+                       relative_path=True,
+                       exist_check=True,
+                       use_prefix_id=False):
+    return get_kitti_info_path(idx, prefix, 'timestamp', '.txt', training,
+                               relative_path, exist_check, use_prefix_id)
+
+
 def get_label_anno(label_path):
    annotations = {}
    annotations.update({
@@ -283,19 +294,9 @@ def get_kitti_image_info(path,
    return list(image_infos)


-def get_waymo_image_info(path,
-                         training=True,
-                         label_info=True,
-                         velodyne=False,
-                         calib=False,
-                         pose=False,
-                         image_ids=7481,
-                         extend_matrix=True,
-                         num_worker=8,
-                         relative_path=True,
-                         with_imageshape=True,
-                         max_sweeps=5):
+class WaymoInfoGatherer:
    """
+    Parallel version of waymo dataset information gathering.
    Waymo annotation format version like KITTI:
    {
        [optional]points: [N, 3+] point cloud
@@ -323,54 +324,88 @@ def get_waymo_image_info(path,
        }
    }
    """
-    root_path = Path(path)
-    if not isinstance(image_ids, list):
-        image_ids = list(range(image_ids))

-    def map_func(idx):
+    def __init__(self,
+                 path,
+                 training=True,
+                 label_info=True,
+                 velodyne=False,
+                 calib=False,
+                 pose=False,
+                 extend_matrix=True,
+                 num_worker=8,
+                 relative_path=True,
+                 with_imageshape=True,
+                 max_sweeps=5) -> None:
+        self.path = path
+        self.training = training
+        self.label_info = label_info
+        self.velodyne = velodyne
+        self.calib = calib
+        self.pose = pose
+        self.extend_matrix = extend_matrix
+        self.num_worker = num_worker
+        self.relative_path = relative_path
+        self.with_imageshape = with_imageshape
+        self.max_sweeps = max_sweeps
+
+    def gather_single(self, idx):
+        root_path = Path(self.path)
        info = {}
        pc_info = {'num_features': 6}
        calib_info = {}

        image_info = {'image_idx': idx}
        annotations = None
-        if velodyne:
+        if self.velodyne:
            pc_info['velodyne_path'] = get_velodyne_path(
-                idx, path, training, relative_path, use_prefix_id=True)
-            points = np.fromfile(
-                Path(path) / pc_info['velodyne_path'], dtype=np.float32)
-            points = np.copy(points).reshape(-1, pc_info['num_features'])
-            info['timestamp'] = np.int64(points[0, -1])
-            # values of the last dim are all the timestamp
+                idx,
+                self.path,
+                self.training,
+                self.relative_path,
+                use_prefix_id=True)
+            with open(
+                    get_timestamp_path(
+                        idx,
+                        self.path,
+                        self.training,
+                        relative_path=False,
+                        use_prefix_id=True)) as f:
+                info['timestamp'] = np.int64(f.read())
        image_info['image_path'] = get_image_path(
            idx,
-            path,
-            training,
-            relative_path,
+            self.path,
+            self.training,
+            self.relative_path,
            info_type='image_0',
            use_prefix_id=True)
-        if with_imageshape:
+        if self.with_imageshape:
            img_path = image_info['image_path']
-            if relative_path:
+            if self.relative_path:
                img_path = str(root_path / img_path)
-            image_info['image_shape'] = np.array(
-                io.imread(img_path).shape[:2], dtype=np.int32)
-        if label_info:
+            # io using PIL is significantly faster than skimage
+            w, h = Image.open(img_path).size
+            image_info['image_shape'] = np.array((h, w), dtype=np.int32)
+        if self.label_info:
            label_path = get_label_path(
                idx,
-                path,
-                training,
-                relative_path,
+                self.path,
+                self.training,
+                self.relative_path,
                info_type='label_all',
                use_prefix_id=True)
-            if relative_path:
+            if self.relative_path:
                label_path = str(root_path / label_path)
            annotations = get_label_anno(label_path)
        info['image'] = image_info
        info['point_cloud'] = pc_info
-        if calib:
+        if self.calib:
            calib_path = get_calib_path(
-                idx, path, training, relative_path=False, use_prefix_id=True)
+                idx,
+                self.path,
+                self.training,
+                relative_path=False,
+                use_prefix_id=True)
            with open(calib_path, 'r') as f:
                lines = f.readlines()
            P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
@@ -383,7 +418,7 @@ def get_waymo_image_info(path,
                           ]).reshape([3, 4])
            P4 = np.array([float(info) for info in lines[4].split(' ')[1:13]
                           ]).reshape([3, 4])
-            if extend_matrix:
+            if self.extend_matrix:
                P0 = _extend_matrix(P0)
                P1 = _extend_matrix(P1)
                P2 = _extend_matrix(P2)
@@ -392,7 +427,7 @@ def get_waymo_image_info(path,
            R0_rect = np.array([
                float(info) for info in lines[5].split(' ')[1:10]
            ]).reshape([3, 3])
-            if extend_matrix:
+            if self.extend_matrix:
                rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
                rect_4x4[3, 3] = 1.
                rect_4x4[:3, :3] = R0_rect
@@ -402,7 +437,7 @@ def get_waymo_image_info(path,
            Tr_velo_to_cam = np.array([
                float(info) for info in lines[6].split(' ')[1:13]
            ]).reshape([3, 4])
-            if extend_matrix:
+            if self.extend_matrix:
                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
            calib_info['P0'] = P0
            calib_info['P1'] = P1
@@ -412,9 +447,13 @@ def get_waymo_image_info(path,
            calib_info['R0_rect'] = rect_4x4
            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
            info['calib'] = calib_info
-        if pose:
+        if self.pose:
            pose_path = get_pose_path(
-                idx, path, training, relative_path=False, use_prefix_id=True)
+                idx,
+                self.path,
+                self.training,
+                relative_path=False,
+                use_prefix_id=True)
            info['pose'] = np.loadtxt(pose_path)

        if annotations is not None:
@@ -424,28 +463,31 @@ def get_waymo_image_info(path,

        sweeps = []
        prev_idx = idx
-        while len(sweeps) < max_sweeps:
+        while len(sweeps) < self.max_sweeps:
            prev_info = {}
            prev_idx -= 1
            prev_info['velodyne_path'] = get_velodyne_path(
                prev_idx,
-                path,
-                training,
-                relative_path,
+                self.path,
+                self.training,
+                self.relative_path,
                exist_check=False,
                use_prefix_id=True)
            if_prev_exists = osp.exists(
-                Path(path) / prev_info['velodyne_path'])
+                Path(self.path) / prev_info['velodyne_path'])
            if if_prev_exists:
-                prev_points = np.fromfile(
-                    Path(path) / prev_info['velodyne_path'], dtype=np.float32)
-                prev_points = np.copy(prev_points).reshape(
-                    -1, pc_info['num_features'])
-                prev_info['timestamp'] = np.int64(prev_points[0, -1])
+                with open(
+                        get_timestamp_path(
+                            prev_idx,
+                            self.path,
+                            self.training,
+                            relative_path=False,
+                            use_prefix_id=True)) as f:
+                    prev_info['timestamp'] = np.int64(f.read())
                prev_pose_path = get_pose_path(
                    prev_idx,
-                    path,
-                    training,
+                    self.path,
+                    self.training,
                    relative_path=False,
                    use_prefix_id=True)
                prev_info['pose'] = np.loadtxt(prev_pose_path)
@@ -456,10 +498,12 @@ def get_waymo_image_info(path,

        return info

-    with futures.ThreadPoolExecutor(num_worker) as executor:
-        image_infos = executor.map(map_func, image_ids)
-
-    return list(image_infos)
+    def gather(self, image_ids):
+        if not isinstance(image_ids, list):
+            image_ids = list(range(image_ids))
+        image_infos = mmcv.track_parallel_progress(self.gather_single,
+                                                   image_ids, self.num_worker)
+        return list(image_infos)


 def kitti_anno_to_label_file(annos, folder):

--- a/tools/data_converter/s3dis_data_utils.py
+++ b/tools/data_converter/s3dis_data_utils.py
@@ -208,7 +208,7 @@ class S3DISSegData(object):
            if mask.endswith('npy'):
                mask = np.load(mask)
            else:
-                mask = np.fromfile(mask, dtype=np.long)
+                mask = np.fromfile(mask, dtype=np.int64)
        label = self.cat_id2class[mask]
        return label


--- a/tools/data_converter/scannet_data_utils.py
+++ b/tools/data_converter/scannet_data_utils.py
@@ -138,9 +138,9 @@ class ScanNetData(object):
                    f'{sample_idx}_sem_label.npy')

                pts_instance_mask = np.load(pts_instance_mask_path).astype(
-                    np.long)
+                    np.int64)
                pts_semantic_mask = np.load(pts_semantic_mask_path).astype(
-                    np.long)
+                    np.int64)

                mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
                mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
@@ -260,7 +260,7 @@ class ScanNetSegData(object):
            if mask.endswith('npy'):
                mask = np.load(mask)
            else:
-                mask = np.fromfile(mask, dtype=np.long)
+                mask = np.fromfile(mask, dtype=np.int64)
        label = self.cat_id2class[mask]
        return label


--- a/tools/data_converter/waymo_converter.py
+++ b/tools/data_converter/waymo_converter.py
@@ -87,6 +87,7 @@ class Waymo2KITTI(object):
        self.calib_save_dir = f'{self.save_dir}/calib'
        self.point_cloud_save_dir = f'{self.save_dir}/velodyne'
        self.pose_save_dir = f'{self.save_dir}/pose'
+        self.timestamp_save_dir = f'{self.save_dir}/timestamp'

        self.create_folder()

@@ -119,6 +120,7 @@ class Waymo2KITTI(object):
            self.save_calib(frame, file_idx, frame_idx)
            self.save_lidar(frame, file_idx, frame_idx)
            self.save_pose(frame, file_idx, frame_idx)
+            self.save_timestamp(frame, file_idx, frame_idx)

            if not self.test_mode:
                self.save_label(frame, file_idx, frame_idx)
@@ -210,7 +212,7 @@ class Waymo2KITTI(object):
            parse_range_image_and_camera_projection(frame)

        # First return
-        points_0, cp_points_0, intensity_0, elongation_0 = \
+        points_0, cp_points_0, intensity_0, elongation_0, mask_indices_0 = \
            self.convert_range_image_to_point_cloud(
                frame,
                range_images,
@@ -221,9 +223,10 @@ class Waymo2KITTI(object):
        points_0 = np.concatenate(points_0, axis=0)
        intensity_0 = np.concatenate(intensity_0, axis=0)
        elongation_0 = np.concatenate(elongation_0, axis=0)
+        mask_indices_0 = np.concatenate(mask_indices_0, axis=0)

        # Second return
-        points_1, cp_points_1, intensity_1, elongation_1 = \
+        points_1, cp_points_1, intensity_1, elongation_1, mask_indices_1 = \
            self.convert_range_image_to_point_cloud(
                frame,
                range_images,
@@ -234,15 +237,18 @@ class Waymo2KITTI(object):
        points_1 = np.concatenate(points_1, axis=0)
        intensity_1 = np.concatenate(intensity_1, axis=0)
        elongation_1 = np.concatenate(elongation_1, axis=0)
+        mask_indices_1 = np.concatenate(mask_indices_1, axis=0)

        points = np.concatenate([points_0, points_1], axis=0)
        intensity = np.concatenate([intensity_0, intensity_1], axis=0)
        elongation = np.concatenate([elongation_0, elongation_1], axis=0)
-        timestamp = frame.timestamp_micros * np.ones_like(intensity)
+        mask_indices = np.concatenate([mask_indices_0, mask_indices_1], axis=0)
+
+        # timestamp = frame.timestamp_micros * np.ones_like(intensity)

        # concatenate x,y,z, intensity, elongation, timestamp (6-dim)
        point_cloud = np.column_stack(
-            (points, intensity, elongation, timestamp))
+            (points, intensity, elongation, mask_indices))

        pc_path = f'{self.point_cloud_save_dir}/{self.prefix}' + \
            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin'
@@ -367,18 +373,39 @@ class Waymo2KITTI(object):
                 f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
            pose)

+    def save_timestamp(self, frame, file_idx, frame_idx):
+        """Save the timestamp data in a separate file instead of the
+        pointcloud.
+
+        Note that SDC's own pose is not included in the regular training
+        of KITTI dataset. KITTI raw dataset contains ego motion files
+        but are not often used. Pose is important for algorithms that
+        take advantage of the temporal information.
+
+        Args:
+            frame (:obj:`Frame`): Open dataset frame proto.
+            file_idx (int): Current file index.
+            frame_idx (int): Current frame index.
+        """
+        with open(
+                join(f'{self.timestamp_save_dir}/{self.prefix}' +
+                     f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
+                'w') as f:
+            f.write(str(frame.timestamp_micros))
+
    def create_folder(self):
        """Create folder for data preprocessing."""
        if not self.test_mode:
            dir_list1 = [
                self.label_all_save_dir, self.calib_save_dir,
-                self.point_cloud_save_dir, self.pose_save_dir
+                self.point_cloud_save_dir, self.pose_save_dir,
+                self.timestamp_save_dir
            ]
            dir_list2 = [self.label_save_dir, self.image_save_dir]
        else:
            dir_list1 = [
                self.calib_save_dir, self.point_cloud_save_dir,
-                self.pose_save_dir
+                self.pose_save_dir, self.timestamp_save_dir
            ]
            dir_list2 = [self.image_save_dir]
        for d in dir_list1:
@@ -409,7 +436,9 @@ class Waymo2KITTI(object):
        Returns:
            tuple[list[np.ndarray]]: (List of points with shape [N, 3],
                camera projections of points with shape [N, 6], intensity
-                with shape [N, 1], elongation with shape [N, 1]). All the
+                with shape [N, 1], elongation with shape [N, 1], points'
+                position in the depth map (element offset if points come from
+                the main lidar otherwise -1) with shape[N, 1]). All the
                lists have the length of lidar numbers (5).
        """
        calibrations = sorted(
@@ -418,6 +447,7 @@ class Waymo2KITTI(object):
        cp_points = []
        intensity = []
        elongation = []
+        mask_indices = []

        frame_pose = tf.convert_to_tensor(
            value=np.reshape(np.array(frame.pose.transform), [4, 4]))
@@ -473,27 +503,36 @@ class Waymo2KITTI(object):
                    pixel_pose=pixel_pose_local,
                    frame_pose=frame_pose_local)

+            mask_index = tf.where(range_image_mask)
+
            range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
-            points_tensor = tf.gather_nd(range_image_cartesian,
-                                         tf.compat.v1.where(range_image_mask))
+            points_tensor = tf.gather_nd(range_image_cartesian, mask_index)

            cp = camera_projections[c.name][ri_index]
            cp_tensor = tf.reshape(
                tf.convert_to_tensor(value=cp.data), cp.shape.dims)
-            cp_points_tensor = tf.gather_nd(
-                cp_tensor, tf.compat.v1.where(range_image_mask))
+            cp_points_tensor = tf.gather_nd(cp_tensor, mask_index)
            points.append(points_tensor.numpy())
            cp_points.append(cp_points_tensor.numpy())

            intensity_tensor = tf.gather_nd(range_image_tensor[..., 1],
-                                            tf.where(range_image_mask))
+                                            mask_index)
            intensity.append(intensity_tensor.numpy())

            elongation_tensor = tf.gather_nd(range_image_tensor[..., 2],
-                                             tf.where(range_image_mask))
+                                             mask_index)
            elongation.append(elongation_tensor.numpy())
+            if c.name == 1:
+                mask_index = (ri_index * range_image_mask.shape[0] +
+                              mask_index[:, 0]
+                              ) * range_image_mask.shape[1] + mask_index[:, 1]
+                mask_index = mask_index.numpy().astype(elongation[-1].dtype)
+            else:
+                mask_index = np.full_like(elongation[-1], -1)
+
+            mask_indices.append(mask_index)

-        return points, cp_points, intensity, elongation
+        return points, cp_points, intensity, elongation, mask_indices

    def cart_to_homo(self, mat):
        """Convert transformation matrix in Cartesian coordinates to

--- a/tools/train.py
+++ b/tools/train.py
@@ -9,6 +9,7 @@ from os import path as osp

 import mmcv
 import torch
+import torch.distributed as dist
 from mmcv import Config, DictAction
 from mmcv.runner import get_dist_info, init_dist

@@ -35,6 +36,10 @@ def parse_args():
    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument(
        '--resume-from', help='the checkpoint file to resume from')
+    parser.add_argument(
+        '--auto-resume',
+        action='store_true',
+        help='resume from the latest checkpoint automatically')
    parser.add_argument(
        '--no-validate',
        action='store_true',
@@ -58,6 +63,10 @@ def parse_args():
        help='number of gpus to use '
        '(only applicable to non-distributed training)')
    parser.add_argument('--seed', type=int, default=0, help='random seed')
+    parser.add_argument(
+        '--diff-seed',
+        action='store_true',
+        help='Whether or not set different seeds for different ranks')
    parser.add_argument(
        '--deterministic',
        action='store_true',
@@ -128,6 +137,14 @@ def main():
                                osp.splitext(osp.basename(args.config))[0])
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
+
+    if args.auto_resume:
+        cfg.auto_resume = args.auto_resume
+        warnings.warn('`--auto-resume` is only supported when mmdet'
+                      'version >= 2.20.0 for 3D detection model or'
+                      'mmsegmentation verision >= 0.21.0 for 3D'
+                      'segmentation model')
+
    if args.gpus is not None:
        cfg.gpu_ids = range(1)
        warnings.warn('`--gpus` is deprecated because we only support '
@@ -191,6 +208,7 @@ def main():

    # set random seeds
    seed = init_random_seed(args.seed)
+    seed = seed + dist.get_rank() if args.diff_seed else seed
    logger.info(f'Set random seed to {seed}, '
                f'deterministic: {args.deterministic}')
    set_random_seed(seed, deterministic=args.deterministic)