Merge branch 'indoor_boxstructure' into 'master'

Indoor boxstructure See merge request open-mmlab/mmdet.3d!65

Merge branch 'indoor_boxstructure' into 'master'
Indoor boxstructure See merge request open-mmlab/mmdet.3d!65
3a9012e4 · zhangwenwei · 9fcfd78f · b70ecdb5 · 3a9012e4 · 3a9012e4
Commit 3a9012e4 authored Jun 14, 2020 by zhangwenwei
9 changed files
--- a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
+++ b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
@@ -62,7 +62,7 @@ def points_in_boxes_batch(points, boxes):
        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate
        boxes (torch.Tensor): [B, T, 7],
            num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate,
-            (x, y, z) is the bottom center
+            (x, y, z) is the bottom center.

    Returns:
        box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0

--- a/tests/test_box3d.py
+++ b/tests/test_box3d.py
@@ -33,7 +33,7 @@ def test_lidar_boxes3d():
         ]],
        dtype=np.float32)
    bottom_center_box = LiDARInstance3DBoxes(
-        gravity_center_box, origin=[0.5, 0.5, 0.5])
+        gravity_center_box, origin=(0.5, 0.5, 0.5))
    expected_tensor = torch.tensor(
        [[
            -5.24223238e+00, 4.00209696e+01, -4.76429619e-01, 2.06200000e+00,

--- a/tests/test_dataset/test_indoor_eval.py
+++ b/tests/test_dataset/test_indoor_eval.py
@@ -5,58 +5,58 @@ from mmdet3d.core.evaluation.indoor_eval import average_precision, indoor_eval


 def test_indoor_eval():
+    from mmdet3d.core.bbox.structures import DepthInstance3DBoxes, Box3DMode
    det_infos = [{
        'labels_3d':
-        torch.Tensor([4, 4, 3, 17, 2]),
+        torch.tensor([0, 1, 2, 2, 0, 3, 1, 2, 3, 2]),
        'boxes_3d':
-        torch.Tensor([[
-            2.8734498, -0.187645, -0.02600911, 0.6761766, 0.56542563,
-            0.5953976, 0.
-        ],
-                      [
-                          0.4031701, -3.2346897, 0.07118589, 0.73209894,
-                          0.8711227, 0.5148243, 0.
-                      ],
-                      [
-                          -1.274147, -2.351935, 0.07428858, 1.4534658,
-                          2.563081, 0.8587492, 0.
-                      ],
-                      [
-                          3.2214177, 0.7899204, 0.03836718, 0.05321002,
-                          1.2607929, 0.1411697, 0.
-                      ],
-                      [
-                          -1.6804854, 2.399011, -0.13099639, 0.5608963,
-                          0.5052759, 0.6770297, 0.
-                      ]]),
-        'scores_3d':
-        torch.Tensor([0.9980684, 0.9747082, 0.9709939, 0.9482147, 0.84311247])
-    }, {
-        'labels_3d':
-        torch.Tensor([17.0, 17.0, 3.0, 4.0, 17.0]),
-        'boxes_3d':
-        torch.Tensor([[
-            3.2112048e+00, 5.6918913e-01, -8.6143613e-04, 1.1942449e-01,
-            1.2988183e+00, 1.9952521e-01, 0.0000000e+00
-        ],
-                      [
-                          3.248133, 0.4324184, 0.20038621, 0.17225507,
-                          1.2736976, 0.32598814, 0.
-                      ],
-                      [
-                          -1.2793612, -2.3155289, 0.15598366, 1.2822601,
-                          2.2253945, 0.8361754, 0.
-                      ],
-                      [
-                          2.8716104, -0.26416883, -0.04933786, 0.8190681,
-                          0.60294986, 0.5769499, 0.
-                      ],
-                      [
-                          -2.2109854, 0.19445783, -0.01614259, 0.40659013,
-                          0.35370222, 0.3290567, 0.
-                      ]]),
+        DepthInstance3DBoxes(
+            torch.tensor([[
+                -2.4089e-03, -3.3174e+00, 4.9438e-01, 2.1668e+00, 2.8431e-01,
+                1.6506e+00, 0.0000e+00
+            ],
+                          [
+                              -3.4269e-01, -2.7565e+00, 2.8144e-02, 6.8554e-01,
+                              9.6854e-01, 6.1755e-01, 0.0000e+00
+                          ],
+                          [
+                              -3.8320e+00, -1.0646e+00, 1.7074e-01, 2.4981e-01,
+                              4.4708e-01, 6.2538e-01, 0.0000e+00
+                          ],
+                          [
+                              4.1073e-01, 3.3757e+00, 3.4311e-01, 8.0617e-01,
+                              2.8679e-01, 1.6060e+00, 0.0000e+00
+                          ],
+                          [
+                              6.1199e-01, -3.1041e+00, 4.1873e-01, 1.2310e+00,
+                              4.0162e-01, 1.7303e+00, 0.0000e+00
+                          ],
+                          [
+                              -5.9877e-01, -2.6011e+00, 1.1148e+00, 1.5704e-01,
+                              7.5957e-01, 9.6930e-01, 0.0000e+00
+                          ],
+                          [
+                              2.7462e-01, -3.0088e+00, 6.5231e-02, 8.1208e-01,
+                              4.1861e-01, 3.7339e-01, 0.0000e+00
+                          ],
+                          [
+                              -1.4704e+00, -2.0024e+00, 2.7479e-01, 1.7888e+00,
+                              1.0566e+00, 1.3704e+00, 0.0000e+00
+                          ],
+                          [
+                              8.2727e-02, -3.1160e+00, 2.5690e-01, 1.4054e+00,
+                              2.0772e-01, 9.6792e-01, 0.0000e+00
+                          ],
+                          [
+                              2.6896e+00, 1.9881e+00, 1.1566e+00, 9.9885e-02,
+                              3.5713e-01, 4.5638e-01, 0.0000e+00
+                          ]]),
+            origin=(0.5, 0.5, 0)),
        'scores_3d':
-        torch.Tensor([0.9965866, 0.99507546, 0.9916463, 0.9702634, 0.95803124])
+        torch.tensor([
+            1.7516e-05, 1.0167e-06, 8.4486e-07, 7.1048e-02, 6.4274e-05,
+            1.5003e-07, 5.8102e-06, 1.9399e-08, 5.3126e-07, 1.8630e-09
+        ])
    }]

    label2cat = {
@@ -64,168 +64,67 @@ def test_indoor_eval():
        1: 'bed',
        2: 'chair',
        3: 'sofa',
-        4: 'table',
-        5: 'door',
-        6: 'window',
-        7: 'bookshelf',
-        8: 'picture',
-        9: 'counter',
-        10: 'desk',
-        11: 'curtain',
-        12: 'refrigerator',
-        13: 'showercurtrain',
-        14: 'toilet',
-        15: 'sink',
-        16: 'bathtub',
-        17: 'garbagebin'
    }
    gt_annos = [{
        'gt_num':
-        12,
+        10,
        'gt_boxes_upright_depth':
        np.array([[
-            2.54621506, -0.89397144, 0.54144311, 2.90430856, 1.78370309,
-            0.93826824
+            -2.4089e-03, -3.3174e+00, 4.9438e-01, 2.1668e+00, 2.8431e-01,
+            1.6506e+00, 0.0000e+00
        ],
                  [
-                      3.36553669, 0.31014189, 0.38758934, 1.2504847,
-                      0.71281439, 0.3908577
+                      -3.4269e-01, -2.7565e+00, 2.8144e-02, 6.8554e-01,
+                      9.6854e-01, 6.1755e-01, 0.0000e+00
                  ],
                  [
-                      0.17272574, 2.90289116, 0.27966365, 0.56292468,
-                      0.8512187, 0.4987641
+                      -3.8320e+00, -1.0646e+00, 1.7074e-01, 2.4981e-01,
+                      4.4708e-01, 6.2538e-01, 0.0000e+00
                  ],
                  [
-                      2.39521956, 1.67557895, 0.40407273, 1.23511314,
-                      0.49469376, 0.62720448
+                      4.1073e-01, 3.3757e+00, 3.4311e-01, 8.0617e-01,
+                      2.8679e-01, 1.6060e+00, 0.0000e+00
                  ],
                  [
-                      -2.41815996, -1.69104958, 0.22304082, 0.55816364,
-                      0.48154473, 0.66580439
+                      6.1199e-01, -3.1041e+00, 4.1873e-01, 1.2310e+00,
+                      4.0162e-01, 1.7303e+00, 0.0000e+00
                  ],
                  [
-                      -0.18044823, 2.9227581, 0.24480903, 0.36165208,
-                      0.44468427, 0.53103662
+                      -5.9877e-01, -2.6011e+00, 1.1148e+00, 1.5704e-01,
+                      7.5957e-01, 9.6930e-01, 0.0000e+00
                  ],
                  [
-                      -2.44398379, -2.1610918, 0.23631772, 0.52229881,
-                      0.63388562, 0.66596919
+                      2.7462e-01, -3.0088e+00, 6.5231e-02, 8.1208e-01,
+                      4.1861e-01, 3.7339e-01, 0.0000e+00
                  ],
                  [
-                      -2.01452827, -2.9558928, 0.8139953, 1.61732554,
-                      0.60224247, 1.79295814
+                      -1.4704e+00, -2.0024e+00, 2.7479e-01, 1.7888e+00,
+                      1.0566e+00, 1.3704e+00, 0.0000e+00
                  ],
                  [
-                      -0.61519569, 3.24365234, 1.24335742, 2.11988783,
-                      0.26006722, 1.77748263
+                      8.2727e-02, -3.1160e+00, 2.5690e-01, 1.4054e+00,
+                      2.0772e-01, 9.6792e-01, 0.0000e+00
                  ],
                  [
-                      -2.64330673, 0.59929442, 1.59422684, 0.07352924,
-                      0.28620502, 0.35408139
-                  ],
-                  [
-                      -0.58128822, 3.23699641, 0.06050609, 1.94151425,
-                      0.16413498, 0.20168215
-                  ],
-                  [
-                      0.15343043, 2.24693251, 0.22470728, 0.49632657,
-                      0.47379827, 0.43063563
+                      2.6896e+00, 1.9881e+00, 1.1566e+00, 9.9885e-02,
+                      3.5713e-01, 4.5638e-01, 0.0000e+00
                  ]]),
        'class':
-        np.array([3, 4, 4, 17, 2, 2, 2, 7, 11, 8, 17, 2])
-    }, {
-        'gt_num':
-        12,
-        'gt_boxes_upright_depth':
-        np.array([[
-            3.48649406, 0.24238291, 0.48358256, 1.34014034, 0.72744983,
-            0.40819243
-        ],
-                  [
-                      -0.50371504, 3.25293231, 1.25988698, 2.12330937,
-                      0.27563906, 1.80230701
-                  ],
-                  [
-                      2.58820581, -0.99452347, 0.57732373, 2.94801593,
-                      1.67463434, 0.88743341
-                  ],
-                  [
-                      -1.9116497, -2.88811016, 0.70502496, 1.62386703,
-                      0.60732293, 1.5857985
-                  ],
-                  [
-                      -2.55324745, 0.6909315, 1.59045517, 0.07264495,
-                      0.32018459, 0.3506999
-                  ],
-                  [
-                      -2.3436017, -2.1659112, 0.254318, 0.5333302, 0.56154585,
-                      0.64904487
-                  ],
-                  [
-                      -2.32046795, -1.6880455, 0.26138437, 0.5586133,
-                      0.59743834, 0.6378752
-                  ],
-                  [
-                      -0.46495372, 3.22126102, 0.03188983, 1.92557108,
-                      0.15160203, 0.24680007
-                  ],
-                  [
-                      0.28087699, 2.88433838, 0.2495866, 0.57001019,
-                      0.85177159, 0.5689255
-                  ],
-                  [
-                      -0.05292395, 2.90586925, 0.23064148, 0.39113954,
-                      0.43746281, 0.52981442
-                  ],
-                  [
-                      0.25537968, 2.25156307, 0.24932587, 0.48192862,
-                      0.51398182, 0.38040417
-                  ],
-                  [
-                      2.60432816, 1.62303996, 0.42025632, 1.23775268,
-                      0.51761389, 0.66034317
-                  ]]),
-        'class':
-        np.array([4, 11, 3, 7, 8, 2, 2, 17, 4, 2, 2, 17])
+        np.array([0, 1, 2, 0, 0, 3, 1, 3, 3, 2])
    }]

-    ret_value = indoor_eval(gt_annos, det_infos, [0.25, 0.5], label2cat)
-    garbagebin_AP_25 = ret_value['garbagebin_AP_0.25']
-    sofa_AP_25 = ret_value['sofa_AP_0.25']
-    table_AP_25 = ret_value['table_AP_0.25']
-    chair_AP_25 = ret_value['chair_AP_0.25']
-    mAP_25 = ret_value['mAP_0.25']
-    garbagebin_rec_25 = ret_value['garbagebin_rec_0.25']
-    sofa_rec_25 = ret_value['sofa_rec_0.25']
-    table_rec_25 = ret_value['table_rec_0.25']
-    chair_rec_25 = ret_value['chair_rec_0.25']
-    mAR_25 = ret_value['mAR_0.25']
-    sofa_AP_50 = ret_value['sofa_AP_0.50']
-    table_AP_50 = ret_value['table_AP_0.50']
-    chair_AP_50 = ret_value['chair_AP_0.50']
-    mAP_50 = ret_value['mAP_0.50']
-    sofa_rec_50 = ret_value['sofa_rec_0.50']
-    table_rec_50 = ret_value['table_rec_0.50']
-    chair_rec_50 = ret_value['chair_rec_0.50']
-    mAR_50 = ret_value['mAR_0.50']
-    assert garbagebin_AP_25 == 0.25
-    assert sofa_AP_25 == 1.0
-    assert table_AP_25 == 0.75
-    assert chair_AP_25 == 0.125
-    assert abs(mAP_25 - 0.303571) < 0.001
-    assert garbagebin_rec_25 == 0.25
-    assert sofa_rec_25 == 1.0
-    assert table_rec_25 == 0.75
-    assert chair_rec_25 == 0.125
-    assert abs(mAR_25 - 0.303571) < 0.001
-    assert sofa_AP_50 == 0.25
-    assert abs(table_AP_50 - 0.416667) < 0.001
-    assert chair_AP_50 == 0.125
-    assert abs(mAP_50 - 0.113095) < 0.001
-    assert sofa_rec_50 == 0.5
-    assert table_rec_50 == 0.5
-    assert chair_rec_50 == 0.125
-    assert abs(mAR_50 - 0.160714) < 0.001
+    ret_value = indoor_eval(
+        gt_annos,
+        det_infos, [0.25, 0.5],
+        label2cat,
+        box_type_3d=DepthInstance3DBoxes,
+        box_mode_3d=Box3DMode.DEPTH)
+
+    assert abs(ret_value['cabinet_AP_0.25'] - 0.666667) < 1e-3
+    assert abs(ret_value['bed_AP_0.25'] - 1.0) < 1e-3
+    assert abs(ret_value['chair_AP_0.25'] - 0.5) < 1e-3
+    assert abs(ret_value['mAP_0.25'] - 0.708333) < 1e-3
+    assert abs(ret_value['mAR_0.25'] - 0.833333) < 1e-3


 def test_average_precision():

--- a/tests/test_dataset/test_scannet_dataset.py
+++ b/tests/test_dataset/test_scannet_dataset.py
@@ -67,13 +67,12 @@ def test_getitem():
         [-1.332374, 1.474838, -0.04405887, -0.00887359],
         [2.1336637, -1.3265059, -0.02880373, 0.00638155],
         [0.43895668, -3.0259454, 1.5560012, 1.5911865]])
-    expected_gt_bboxes_3d = np.array([
-        [-1.5005362, -3.512584, 1.8565295, 1.7457027, 0.24149807, 0.57235193],
-        [-2.8848705, 3.4961755, 1.5268247, 0.66170084, 0.17433672, 0.67153597],
-        [-1.1585636, -2.192365, 0.61649567, 0.5557011, 2.5375574, 1.2144762],
-        [-2.930457, -2.4856408, 0.9722377, 0.6270478, 1.8461524, 0.28697443],
-        [3.3114715, -0.00476722, 1.0712197, 0.46191898, 3.8605113, 2.1603441]
-    ])
+    expected_gt_bboxes_3d = torch.tensor(
+        [[-1.5005, -3.5126, 1.5704, 1.7457, 0.2415, 0.5724, 0.0000],
+         [-2.8849, 3.4962, 1.1911, 0.6617, 0.1743, 0.6715, 0.0000],
+         [-1.1586, -2.1924, 0.0093, 0.5557, 2.5376, 1.2145, 0.0000],
+         [-2.9305, -2.4856, 0.8288, 0.6270, 1.8462, 0.2870, 0.0000],
+         [3.3115, -0.0048, -0.0090, 0.4619, 3.8605, 2.1603, 0.0000]])
    expected_gt_labels = np.array([
        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
        0, 0, 0, 5, 5, 5
@@ -84,8 +83,8 @@ def test_getitem():

    assert scannet_dataset.CLASSES == class_names
    assert np.allclose(points, expected_points)
-    assert gt_bboxes_3d[:5].shape == (5, 6)
-    assert np.allclose(gt_bboxes_3d[:5], expected_gt_bboxes_3d)
+    assert gt_bboxes_3d.tensor[:5].shape == (5, 7)
+    assert torch.allclose(gt_bboxes_3d.tensor[:5], expected_gt_bboxes_3d, 1e-2)
    assert np.all(gt_labels.numpy() == expected_gt_labels)
    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)
    assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)
@@ -114,42 +113,51 @@ def test_getitem():


 def test_evaluate():
+    from mmdet3d.core.bbox.structures import DepthInstance3DBoxes
    root_path = './tests/data/scannet'
    ann_file = './tests/data/scannet/scannet_infos.pkl'
    scannet_dataset = ScanNetDataset(root_path, ann_file)
    results = []
    pred_boxes = dict()
-    pred_boxes['boxes_3d'] = torch.Tensor(
-        [[
-            3.52074146e+00, -1.48129511e+00, 1.57035351e+00, 2.31956959e-01,
-            1.74445975e+00, 5.72351933e-01, 0
+    pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
+        torch.tensor([[
+            1.4813e+00, 3.5207e+00, 1.5704e+00, 1.7445e+00, 2.3196e-01,
+            5.7235e-01, 0.0000e+00
        ],
-         [
-             -3.48033905e+00, -2.90395617e+00, 1.19105673e+00, 1.70723915e-01,
-             6.60776615e-01, 6.71535969e-01, 0
-         ],
-         [
-             2.19867110e+00, -1.14655101e+00, 9.25755501e-03, 2.53463078e+00,
-             5.41841269e-01, 1.21447623e+00, 0
-         ],
-         [
-             2.50163722, -2.91681337, 0.82875049, 1.84280431, 0.61697435,
-             0.28697443, 0
-         ],
-         [
-             -0.01335114, 3.3114481, -0.00895238, 3.85815716, 0.44081616,
-             2.16034412, 0
-         ]])
-    pred_boxes['labels_3d'] = torch.Tensor([6, 6, 4, 9, 11])
-    pred_boxes['scores_3d'] = torch.Tensor([0.5, 1.0, 1.0, 1.0, 1.0])
+                      [
+                          2.9040e+00, -3.4803e+00, 1.1911e+00, 6.6078e-01,
+                          1.7072e-01, 6.7154e-01, 0.0000e+00
+                      ],
+                      [
+                          1.1466e+00, 2.1987e+00, 9.2576e-03, 5.4184e-01,
+                          2.5346e+00, 1.2145e+00, 0.0000e+00
+                      ],
+                      [
+                          2.9168e+00, 2.5016e+00, 8.2875e-01, 6.1697e-01,
+                          1.8428e+00, 2.8697e-01, 0.0000e+00
+                      ],
+                      [
+                          -3.3114e+00, -1.3351e-02, -8.9524e-03, 4.4082e-01,
+                          3.8582e+00, 2.1603e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.0135e+00, -3.4857e+00, 9.3848e-01, 1.9911e+00,
+                          2.1603e-01, 1.2767e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.1945e+00, -3.1402e+00, -3.8165e-02, 1.4801e+00,
+                          6.8676e-01, 1.0586e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.7553e+00, 2.4055e+00, -2.9972e-02, 1.4764e+00,
+                          1.4927e+00, 2.3380e+00, 0.0000e+00
+                      ]]))
+    pred_boxes['labels_3d'] = torch.tensor([6, 6, 4, 9, 11, 11])
+    pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0, 1.0, 1.0, 0.5])
    results.append(pred_boxes)
    metric = [0.25, 0.5]
    ret_dict = scannet_dataset.evaluate(results, metric)
-    table_average_precision_25 = ret_dict['table_AP_0.25']
-    window_average_precision_25 = ret_dict['window_AP_0.25']
-    counter_average_precision_25 = ret_dict['counter_AP_0.25']
-    curtain_average_precision_25 = ret_dict['curtain_AP_0.25']
-    assert abs(table_average_precision_25 - 0.3333) < 0.01
-    assert abs(window_average_precision_25 - 1) < 0.01
-    assert abs(counter_average_precision_25 - 1) < 0.01
-    assert abs(curtain_average_precision_25 - 0.5) < 0.01
+    assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01
+    assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01
--- a/tests/test_dataset/test_sunrgbd_dataset.py
+++ b/tests/test_dataset/test_sunrgbd_dataset.py
@@ -57,25 +57,15 @@ def test_getitem():
                                [0.6464, 1.5635, 0.0826, 0.0616],
                                [0.6453, 1.5603, 0.0849, 0.0638],
                                [0.6488, 1.5786, 0.0461, 0.0251]])
-    expected_gt_bboxes_3d = np.array([[
-        -2.012483, 3.9473376, -0.25446942, 2.3730404, 1.9457763, 2.0303352,
-        1.2205974
-    ],
-                                      [
-                                          -3.7036808, 4.2396426, -0.81091917,
-                                          0.6032123, 0.91040343, 1.003341,
-                                          1.2662518
-                                      ],
-                                      [
-                                          0.6528646, 2.1638472, -0.15228128,
-                                          0.7347852, 1.6113238, 2.1694272,
-                                          2.81404
-                                      ]])
+    expected_gt_bboxes_3d = torch.tensor(
+        [[-2.0125, 3.9473, -1.2696, 2.3730, 1.9458, 2.0303, 1.2206],
+         [-3.7037, 4.2396, -1.3126, 0.6032, 0.9104, 1.0033, 1.2663],
+         [0.6529, 2.1638, -1.2370, 0.7348, 1.6113, 2.1694, 2.8140]])
    expected_gt_labels = np.array([0, 7, 6])
    original_classes = sunrgbd_dataset.CLASSES

    assert np.allclose(points, expected_points, 1e-2)
-    assert np.allclose(gt_bboxes_3d, expected_gt_bboxes_3d)
+    assert np.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
    assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
    assert original_classes == class_names

@@ -101,23 +91,19 @@ def test_getitem():


 def test_evaluate():
+    from mmdet3d.core.bbox.structures import DepthInstance3DBoxes
    root_path = './tests/data/sunrgbd'
    ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'
    sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file)
    results = []
    pred_boxes = dict()
-    pred_boxes['boxes_3d'] = torch.Tensor(
-        [[
-            4.168696, -1.047307, -1.231666, 1.887584, 2.30207, 1.969614,
-            1.69564944
-        ],
-         [
-             4.811675, -2.583086, -1.273334, 0.883176, 0.585172, 0.973334,
-             1.64999513
-         ], [1.904545, 1.086364, -1.2, 1.563134, 0.71281, 2.104546,
-             0.1022069]])
-    pred_boxes['labels_3d'] = torch.Tensor([0, 7, 6])
-    pred_boxes['scores_3d'] = torch.Tensor([0.5, 1.0, 1.0])
+    pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
+        torch.tensor(
+            [[1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.6956],
+             [2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.6500],
+             [-1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 0.1022]]))
+    pred_boxes['labels_3d'] = torch.tensor([0, 7, 6])
+    pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0])
    results.append(pred_boxes)
    metric = [0.25, 0.5]
    ap_dict = sunrgbd_dataset.evaluate(results, metric)

--- a/tests/test_pipeline/test_indoor_augment.py
+++ b/tests/test_pipeline/test_indoor_augment.py
 import numpy as np
+import torch

+from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet3d.datasets.pipelines import IndoorFlipData, IndoorGlobalRotScale


@@ -10,15 +12,15 @@ def test_indoor_flip_data():
    sunrgbd_results['points'] = np.array(
        [[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
         [-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
-    sunrgbd_results['gt_bboxes_3d'] = np.array([[
-        0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 3.07028526
-    ],
-                                                [
-                                                    -0.449953, 1.395455,
-                                                    -1.027778, 1.500956,
-                                                    1.637298, 0.636364,
-                                                    -1.58242359
-                                                ]])
+    sunrgbd_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
+        np.array([[
+            0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728,
+            3.07028526
+        ],
+                  [
+                      -0.449953, 1.395455, -1.027778, 1.500956, 1.637298,
+                      0.636364, -1.58242359
+                  ]]))
    sunrgbd_results = sunrgbd_indoor_flip_data(sunrgbd_results)
    sunrgbd_points = sunrgbd_results['points']
    sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
@@ -26,13 +28,12 @@ def test_indoor_flip_data():
    expected_sunrgbd_points = np.array(
        [[-1.02828765, 3.65790772, 0.1972947, 1.61959505],
         [0.39597902, 1.05465031, -0.74920434, 0.673096]])
-    expected_sunrgbd_gt_bboxes_3d = np.array([[
-        -0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 0.07130739
-    ], [
-        0.449953, 1.395455, -1.027778, 1.500956, 1.637298, 0.636364, 4.72401624
-    ]])
+    expected_sunrgbd_gt_bboxes_3d = torch.tensor(
+        [[-0.2137, 1.0364, -0.9823, 0.6154, 0.5726, 0.8727, 0.0713],
+         [0.4500, 1.3955, -1.0278, 1.5010, 1.6373, 0.6364, 4.7240]])
    assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
-    assert np.allclose(sunrgbd_gt_bboxes_3d, expected_sunrgbd_gt_bboxes_3d)
+    assert torch.allclose(sunrgbd_gt_bboxes_3d.tensor,
+                          expected_sunrgbd_gt_bboxes_3d, 1e-3)

    np.random.seed(0)
    scannet_indoor_flip_data = IndoorFlipData(1, 1)
@@ -40,11 +41,17 @@ def test_indoor_flip_data():
    scannet_results['points'] = np.array(
        [[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
         [1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
-    scannet_results['gt_bboxes_3d'] = np.array([[
-        0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
-    ], [
-        -0.03226406, 1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
-    ]])
+    scannet_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
+        np.array([[
+            0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864,
+            0.5163464
+        ],
+                  [
+                      -0.03226406, 1.70392646, 0.60348618, 0.65165804,
+                      0.72084366, 0.64667457
+                  ]]),
+        box_dim=6,
+        with_yaw=False)
    scannet_results = scannet_indoor_flip_data(scannet_results)
    scannet_points = scannet_results['points']
    scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
@@ -52,13 +59,12 @@ def test_indoor_flip_data():
    expected_scannet_points = np.array(
        [[-1.6110241, 0.16903955, 0.5811581, 0.5989725],
         [-1.3978075, -0.42035791, 0.38729519, 0.40510958]])
-    expected_scannet_gt_bboxes_3d = np.array([[
-        -0.55903838, -0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
-    ], [
-        0.03226406, -1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
-    ]])
+    expected_scannet_gt_bboxes_3d = torch.tensor(
+        [[-0.5590, -0.4820, 0.6569, 0.6537, 0.6003, 0.5163, 0.0000],
+         [0.0323, -1.7039, 0.6035, 0.6517, 0.7208, 0.6467, 0.0000]])
    assert np.allclose(scannet_points, expected_scannet_points)
-    assert np.allclose(scannet_gt_bboxes_3d, expected_scannet_gt_bboxes_3d)
+    assert torch.allclose(scannet_gt_bboxes_3d.tensor,
+                          expected_scannet_gt_bboxes_3d, 1e-2)


 def test_global_rot_scale():
@@ -69,15 +75,15 @@ def test_global_rot_scale():
    sunrgbd_results['points'] = np.array(
        [[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
         [-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
-    sunrgbd_results['gt_bboxes_3d'] = np.array([[
-        0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 3.07028526
-    ],
-                                                [
-                                                    -0.449953, 1.395455,
-                                                    -1.027778, 1.500956,
-                                                    1.637298, 0.636364,
-                                                    -1.58242359
-                                                ]])
+    sunrgbd_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
+        np.array([[
+            0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728,
+            3.07028526
+        ],
+                  [
+                      -0.449953, 1.395455, -1.027778, 1.500956, 1.637298,
+                      0.636364, -1.58242359
+                  ]]))

    sunrgbd_results = sunrgbd_augment(sunrgbd_results)
    sunrgbd_points = sunrgbd_results['points']
@@ -86,18 +92,12 @@ def test_global_rot_scale():
    expected_sunrgbd_points = np.array(
        [[0.89427376, 3.94489646, 0.21003141, 1.72415094],
         [-0.47835783, 1.09972989, -0.79757058, 0.71654893]])
-    expected_sunrgbd_gt_bboxes_3d = np.array([[
-        0.17080999, 1.11345031, -1.04573864, 0.65513891, 0.60953755,
-        0.92906854, 3.01916788
-    ],
-                                              [
-                                                  -0.55427876, 1.45912611,
-                                                  -1.09412807, 1.59785293,
-                                                  1.74299674, 0.67744563,
-                                                  -1.63354097
-                                              ]])
+    expected_sunrgbd_gt_bboxes_3d = torch.tensor(
+        [[0.1708, 1.1135, -1.0457, 0.6551, 0.6095, 0.9291, 3.0192],
+         [-0.5543, 1.4591, -1.0941, 1.5979, 1.7430, 0.6774, -1.6335]])
    assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
-    assert np.allclose(sunrgbd_gt_bboxes_3d, expected_sunrgbd_gt_bboxes_3d)
+    assert torch.allclose(sunrgbd_gt_bboxes_3d.tensor,
+                          expected_sunrgbd_gt_bboxes_3d, 1e-3)

    np.random.seed(0)
    scannet_augment = IndoorGlobalRotScale(
@@ -106,11 +106,17 @@ def test_global_rot_scale():
    scannet_results['points'] = np.array(
        [[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
         [1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
-    scannet_results['gt_bboxes_3d'] = np.array([[
-        0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
-    ], [
-        -0.03226406, 1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
-    ]])
+    scannet_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
+        np.array([[
+            0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864,
+            0.5163464
+        ],
+                  [
+                      -0.03226406, 1.70392646, 0.60348618, 0.65165804,
+                      0.72084366, 0.64667457
+                  ]]),
+        box_dim=6,
+        with_yaw=False)
    scannet_results = scannet_augment(scannet_results)
    scannet_points = scannet_results['points']
    scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
@@ -118,10 +124,9 @@ def test_global_rot_scale():
    expected_scannet_points = np.array(
        [[1.61240576, -0.15530836, 0.5811581, 0.5989725],
         [1.39417555, 0.43225122, 0.38729519, 0.40510958]])
-    expected_scannet_gt_bboxes_3d = np.array([[
-        0.55491157, 0.48676213, 0.65688646, 0.65879754, 0.60584609, 0.5163464
-    ], [
-        -0.04677942, 1.70358975, 0.60348618, 0.65777559, 0.72636927, 0.64667457
-    ]])
+    expected_scannet_gt_bboxes_3d = torch.tensor(
+        [[0.5549, 0.4868, 0.6569, 0.6588, 0.6058, 0.5163, 0.0000],
+         [-0.0468, 1.7036, 0.6035, 0.6578, 0.7264, 0.6467, 0.0000]])
    assert np.allclose(scannet_points, expected_scannet_points)
-    assert np.allclose(scannet_gt_bboxes_3d, expected_scannet_gt_bboxes_3d)
+    assert torch.allclose(scannet_gt_bboxes_3d.tensor,
+                          expected_scannet_gt_bboxes_3d, 1e-3)
--- a/tests/test_pipeline/test_indoor_pipeline.py
+++ b/tests/test_pipeline/test_indoor_pipeline.py
@@ -2,7 +2,9 @@ import os.path as osp

 import mmcv
 import numpy as np
+import torch

+from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet3d.datasets.pipelines import Compose


@@ -57,7 +59,8 @@ def test_scannet_pipeline():
        data_path, info['pts_instance_mask_path'])
    results['ann_info']['pts_semantic_mask_path'] = osp.join(
        data_path, info['pts_semantic_mask_path'])
-    results['ann_info']['gt_bboxes_3d'] = scannet_gt_bboxes_3d
+    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(
+        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
    results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d

    results['bbox3d_fields'] = []
@@ -77,13 +80,12 @@ def test_scannet_pipeline():
         [-1.332374, 1.474838, -0.04405887, -0.00887359],
         [2.1336637, -1.3265059, -0.02880373, 0.00638155],
         [0.43895668, -3.0259454, 1.5560012, 1.5911865]])
-    expected_gt_bboxes_3d = np.array([
-        [-1.5005362, -3.512584, 1.8565295, 1.7457027, 0.24149807, 0.57235193],
-        [-2.8848705, 3.4961755, 1.5268247, 0.66170084, 0.17433672, 0.67153597],
-        [-1.1585636, -2.192365, 0.61649567, 0.5557011, 2.5375574, 1.2144762],
-        [-2.930457, -2.4856408, 0.9722377, 0.6270478, 1.8461524, 0.28697443],
-        [3.3114715, -0.00476722, 1.0712197, 0.46191898, 3.8605113, 2.1603441]
-    ])
+    expected_gt_bboxes_3d = torch.tensor(
+        [[-1.5005, -3.5126, 1.8565, 1.7457, 0.2415, 0.5724, 0.0000],
+         [-2.8849, 3.4962, 1.5268, 0.6617, 0.1743, 0.6715, 0.0000],
+         [-1.1586, -2.1924, 0.6165, 0.5557, 2.5376, 1.2145, 0.0000],
+         [-2.9305, -2.4856, 0.9722, 0.6270, 1.8462, 0.2870, 0.0000],
+         [3.3115, -0.0048, 1.0712, 0.4619, 3.8605, 2.1603, 0.0000]])
    expected_gt_labels_3d = np.array([
        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
        0, 0, 0, 5, 5, 5
@@ -91,7 +93,8 @@ def test_scannet_pipeline():
    expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])
    expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
    assert np.allclose(points, expected_points)
-    assert np.allclose(gt_bboxes_3d[:5, :], expected_gt_bboxes_3d)
+    assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,
+                          1e-2)
    assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d)
    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)
    assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)
@@ -130,12 +133,12 @@ def test_sunrgbd_pipeline():
            np.float32)
        gt_labels_3d = info['annos']['class'].astype(np.long)
    else:
-        gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        gt_bboxes_3d = np.zeros((1, 7), dtype=np.float32)
        gt_labels_3d = np.zeros((1, ), dtype=np.long)

    # prepare input of pipeline
    results['ann_info'] = dict()
-    results['ann_info']['gt_bboxes_3d'] = gt_bboxes_3d
+    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(gt_bboxes_3d)
    results['ann_info']['gt_labels_3d'] = gt_labels_3d
    results['bbox3d_fields'] = []
    results['pts_mask_fields'] = []
@@ -150,21 +153,11 @@ def test_sunrgbd_pipeline():
                                [0.6464, 1.5635, 0.0826, 0.0616],
                                [0.6453, 1.5603, 0.0849, 0.0638],
                                [0.6488, 1.5786, 0.0461, 0.0251]])
-    expected_gt_bboxes_3d = np.array([[
-        -2.012483, 3.9473376, -0.25446942, 2.3730404, 1.9457763, 2.0303352,
-        1.2205974
-    ],
-                                      [
-                                          -3.7036808, 4.2396426, -0.81091917,
-                                          0.6032123, 0.91040343, 1.003341,
-                                          1.2662518
-                                      ],
-                                      [
-                                          0.6528646, 2.1638472, -0.15228128,
-                                          0.7347852, 1.6113238, 2.1694272,
-                                          2.81404
-                                      ]])
+    expected_gt_bboxes_3d = torch.tensor(
+        [[-2.0125, 3.9473, -0.2545, 2.3730, 1.9458, 2.0303, 1.2206],
+         [-3.7037, 4.2396, -0.8109, 0.6032, 0.9104, 1.0033, 1.2663],
+         [0.6529, 2.1638, -0.1523, 0.7348, 1.6113, 2.1694, 2.8140]])
    expected_gt_labels_3d = np.array([0, 7, 6])
-    assert np.allclose(gt_bboxes_3d, expected_gt_bboxes_3d)
+    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
    assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d)
    assert np.allclose(points, expected_points, 1e-2)
--- a/tests/test_pipeline/test_loading.py
+++ b/tests/test_pipeline/test_loading.py
@@ -4,6 +4,7 @@ import mmcv
 import numpy as np
 import pytest

+from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet3d.datasets.pipelines import LoadAnnotations3D, LoadPointsFromFile


@@ -79,7 +80,8 @@ def test_load_annotations3D():
        data_path, scannet_info['pts_instance_mask_path'])
    scannet_results['ann_info']['pts_semantic_mask_path'] = osp.join(
        data_path, scannet_info['pts_semantic_mask_path'])
-    scannet_results['ann_info']['gt_bboxes_3d'] = scannet_gt_bboxes_3d
+    scannet_results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(
+        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
    scannet_results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d

    scannet_results['bbox3d_fields'] = []
@@ -92,7 +94,7 @@ def test_load_annotations3D():

    scannet_pts_instance_mask = scannet_results['pts_instance_mask']
    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
-    assert scannet_gt_boxes.shape == (27, 6)
+    assert scannet_gt_boxes.tensor.shape == (27, 7)
    assert scannet_gt_lbaels.shape == (27, )
    assert scannet_pts_instance_mask.shape == (100, )
    assert scannet_pts_semantic_mask.shape == (100, )
--- a/tools/test.py
+++ b/tools/test.py
@@ -9,7 +9,7 @@ from tools.fuse_conv_bn import fuse_module

 from mmdet3d.datasets import build_dataloader, build_dataset
 from mmdet3d.models import build_detector
-from mmdet.apis import multi_gpu_test, single_gpu_test
+from mmdet.apis import multi_gpu_test, set_random_seed, single_gpu_test
 from mmdet.core import wrap_fp16_model


@@ -76,6 +76,11 @@ def parse_args():
        '--tmpdir',
        help='tmp directory used for collecting results from multiple '
        'workers, available when gpu_collect is not specified')
+    parser.add_argument('--seed', type=int, default=0, help='random seed')
+    parser.add_argument(
+        '--deterministic',
+        action='store_true',
+        help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument(
        '--options', nargs='+', action=MultipleKVAction, help='custom options')
    parser.add_argument(
@@ -108,6 +113,7 @@ def main():
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
+
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

@@ -118,6 +124,10 @@ def main():
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

+    # set random seeds
+    if args.seed is not None:
+        set_random_seed(args.seed, deterministic=args.deterministic)
+
    # build the dataloader
    # TODO: support multiple images per gpu (only minor changes are needed)
    dataset = build_dataset(cfg.data.test)