Use box3d structure

3b7ec493 · zhangwenwei · 40006a6a · 3b7ec493 · 3b7ec493 · 3b7ec493
Commit 3b7ec493 authored Jun 10, 2020 by zhangwenwei
11 changed files
--- a/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -151,6 +151,9 @@ data = dict(
    samples_per_gpu=6,
    workers_per_gpu=4,
    train=dict(
+        type='RepeatDataset',
+        times=2,
+        dataset=dict(
            type=dataset_type,
            data_root=data_root,
            ann_file=data_root + 'kitti_infos_train.pkl',
@@ -159,7 +162,7 @@ data = dict(
            pipeline=train_pipeline,
            modality=input_modality,
            classes=class_names,
-        test_mode=False),
+            test_mode=False)),
    val=dict(
        type=dataset_type,
        data_root=data_root,
@@ -197,7 +200,7 @@ momentum_config = dict(
    step_ratio_up=0.4,
 )
 checkpoint_config = dict(interval=1)
-evaluation = dict(interval=2)
+evaluation = dict(interval=1)
 # yapf:disable
 log_config = dict(
    interval=50,
@@ -207,7 +210,7 @@ log_config = dict(
    ])
 # yapf:enable
 # runtime settings
-total_epochs = 80
+total_epochs = 40
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
 work_dir = './work_dirs/sec_secfpn_80e'

--- a/mmdet3d/core/bbox/structures/base_box3d.py
+++ b/mmdet3d/core/bbox/structures/base_box3d.py
@@ -10,15 +10,24 @@ from .utils import limit_period, xywhr2xyxyr
 class BaseInstance3DBoxes(object):
    """Base class for 3D Boxes
+    Note:
+        The box is bottom centered, i.e. the relative position of origin in
+            the box is [0.5, 0.5, 0].
    Args:
-        tensor (torch.Tensor | np.ndarray): a Nxbox_dim matrix.
+        tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix.
        box_dim (int): number of the dimension of a box
            Each row is (x, y, z, x_size, y_size, z_size, yaw).
-        with_yaw (bool): if True, the value of yaw will be
+            Default to 7.
-        set to 0 as minmax boxes.
+        with_yaw (bool): Whether the box is with yaw rotation.
+            If False, the value of yaw will be set to 0 as minmax boxes.
+            Default to True.
+        origin (tuple): The relative position of origin in the box.
+            Default to [0.5, 0.5, 0]. This will guide the box be converted to
+            [0.5, 0.5, 0] mode.
    """
-    def __init__(self, tensor, box_dim=7, with_yaw=True):
+    def __init__(self, tensor, box_dim=7, with_yaw=True, origin=[0.5, 0.5, 0]):
        if isinstance(tensor, torch.Tensor):
            device = tensor.device
        else:
@@ -41,6 +50,11 @@ class BaseInstance3DBoxes(object):
        self.with_yaw = with_yaw
        self.tensor = tensor
+        if origin != [0.5, 0.5, 0]:
+            dst = self.tensor.new_tensor([0.5, 0.5, 0])
+            src = self.tensor.new_tensor(origin)
+            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
    @property
    def volume(self):
        """Computes the volume of all the boxes.
@@ -61,12 +75,21 @@ class BaseInstance3DBoxes(object):
        """
        return self.tensor[:, 3:6]
+    @property
+    def yaw(self):
+        """Obtain the rotation of all the boxes.
+        Returns:
+            torch.Tensor: a vector with yaw of each box.
+        """
+        return self.tensor[:, 6]
    @property
    def height(self):
        """Obtain the height of all the boxes.
        Returns:
-            torch.Tensor: a vector with volume of each box.
+            torch.Tensor: a vector with height of each box.
        """
        return self.tensor[:, 5]
@@ -438,7 +461,8 @@ class BaseInstance3DBoxes(object):
            BaseInstance3DBoxes: A new bbox with data and other
                properties are similar to self.
        """
-        new_tensor = self.tensor.new_tensor(data)
+        new_tensor = self.tensor.new_tensor(data) \
+            if not isinstance(data, torch.Tensor) else data.to(self.device)
        original_type = type(self)
        return original_type(
            new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)
--- a/mmdet3d/core/bbox/structures/cam_box3d.py
+++ b/mmdet3d/core/bbox/structures/cam_box3d.py
@@ -29,8 +29,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        tensor (torch.Tensor): float matrix of N x box_dim.
        box_dim (int): integer indicates the dimension of a box
            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
-        with_yaw (bool): if True, the value of yaw will be
+        with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
-        set to 0 as minmax boxes.
+            boxes.
    """
    @property

--- a/mmdet3d/core/bbox/structures/depth_box3d.py
+++ b/mmdet3d/core/bbox/structures/depth_box3d.py
@@ -26,8 +26,8 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
        tensor (torch.Tensor): float matrix of N x box_dim.
        box_dim (int): integer indicates the dimension of a box
            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
-        with_yaw (bool): if True, the value of yaw will be
+        with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
-        set to 0 as minmax boxes.
+            boxes.
    """
    @property

--- a/mmdet3d/core/bbox/structures/lidar_box3d.py
+++ b/mmdet3d/core/bbox/structures/lidar_box3d.py
 import numpy as np
 import torch
+from mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu
 from .base_box3d import BaseInstance3DBoxes
 from .utils import limit_period, rotation_3d_in_axis
@@ -26,8 +27,8 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
        tensor (torch.Tensor): float matrix of N x box_dim.
        box_dim (int): integer indicates the dimension of a box
            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
-        with_yaw (bool): if True, the value of yaw will be
+        with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
-        set to 0 as minmax boxes.
+            boxes.
    """
    @property
@@ -196,3 +197,32 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
        from .box_3d_mode import Box3DMode
        return Box3DMode.convert(
            box=self, src=Box3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
+    def enlarged_box(self, extra_width):
+        """Enlarge the length, width and height boxes
+        Args:
+            extra_width (float | torch.Tensor): extra width to enlarge the box
+        Returns:
+            :obj:LiDARInstance3DBoxes: enlarged boxes
+        """
+        enlarged_boxes = self.tensor.clone()
+        enlarged_boxes[:, 3:6] += extra_width * 2
+        # bottom center z minus extra_width
+        enlarged_boxes[:, 2] -= extra_width
+        return self.new_box(enlarged_boxes)
+    def points_in_boxes(self, points):
+        """Find the box which the points are in.
+        Args:
+            points (:obj:torch.Tensor): Points in shape Nx3
+        Returns:
+            torch.Tensor: The index of box where each point are in.
+        """
+        box_idx = points_in_boxes_gpu(
+            points.unsqueeze(0),
+            self.tensor.unsqueeze(0).to(points.device)).squeeze(0)
+        return box_idx
--- a/mmdet3d/datasets/nuscenes_dataset.py
+++ b/mmdet3d/datasets/nuscenes_dataset.py
@@ -7,7 +7,7 @@ import pyquaternion
 from nuscenes.utils.data_classes import Box as NuScenesBox
 from mmdet.datasets import DATASETS
-from ..core.bbox import box_np_ops
+from ..core.bbox import LiDARInstance3DBoxes, box_np_ops
 from .custom_3d import Custom3DDataset
@@ -152,10 +152,6 @@ class NuScenesDataset(Custom3DDataset):
        # filter out bbox containing no points
        mask = info['num_lidar_pts'] > 0
        gt_bboxes_3d = info['gt_boxes'][mask]
-        # the nuscenes box center is [0.5, 0.5, 0.5], we keep it
-        # the same as KITTI [0.5, 0.5, 0]
-        box_np_ops.change_box3d_center_(gt_bboxes_3d, [0.5, 0.5, 0.5],
-                                        [0.5, 0.5, 0])
        gt_names_3d = info['gt_names'][mask]
        gt_labels_3d = []
        for cat in gt_names_3d:
@@ -171,6 +167,13 @@ class NuScenesDataset(Custom3DDataset):
            gt_velocity[nan_mask] = [0.0, 0.0]
            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
+        # the nuscenes box center is [0.5, 0.5, 0.5], we keep it
+        # the same as KITTI [0.5, 0.5, 0]
+        gt_bboxes_3d = LiDARInstance3DBoxes(
+            gt_bboxes_3d,
+            box_dim=gt_bboxes_3d.shape[-1],
+            origin=[0.5, 0.5, 0.5])
        anns_results = dict(
            gt_bboxes_3d=gt_bboxes_3d,
            gt_labels_3d=gt_labels_3d,

--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
@@ -5,7 +5,6 @@ import torch.nn.functional as F
 from mmdet3d.core import multi_apply
 from mmdet3d.core.bbox import box_torch_ops
 from mmdet3d.models.builder import build_loss
-from mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu
 from mmdet.models import HEADS
@@ -14,7 +13,7 @@ class PointwiseSemanticHead(nn.Module):
    """Semantic segmentation head for point-wise segmentation.
    Predict point-wise segmentation and part regression results for PartA2.
-    See https://arxiv.org/abs/1907.03670 for more detials.
+    See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
    Args:
        in_channels (int): the number of input channel.
@@ -65,28 +64,27 @@ class PointwiseSemanticHead(nn.Module):
            seg_preds=seg_preds, part_preds=part_preds, part_feats=part_feats)
    def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d):
-        """generate segmentation and part prediction targets
+        """generate segmentation and part prediction targets for a single sample
        Args:
            voxel_centers (torch.Tensor): shape [voxel_num, 3],
                the center of voxels
-            gt_bboxes_3d (torch.Tensor): shape [box_num, 7], gt boxes
+            gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt boxes containing tensor
+                of shape [box_num, 7].
            gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
        Returns:
            tuple : segmentation targets with shape [voxel_num]
                part prediction targets with shape [voxel_num, 3]
        """
-        enlarged_gt_boxes = box_torch_ops.enlarge_box3d_lidar(
+        gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
-            gt_bboxes_3d, extra_width=self.extra_width)
+        enlarged_gt_boxes = gt_bboxes_3d.enlarged_box(self.extra_width)
        part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),
                                               dtype=torch.float32)
-        box_idx = points_in_boxes_gpu(
+        box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers)
-            voxel_centers.unsqueeze(0),
+        enlarge_box_idx = enlarged_gt_boxes.points_in_boxes(
-            gt_bboxes_3d.unsqueeze(0)).squeeze(0)  # -1 ~ box_num
+            voxel_centers).long()
-        enlarge_box_idx = points_in_boxes_gpu(
-            voxel_centers.unsqueeze(0),
-            enlarged_gt_boxes.unsqueeze(0)).squeeze(0).long()  # -1 ~ box_num
        gt_labels_pad = F.pad(
            gt_labels_3d, (1, 0), mode='constant', value=self.num_classes)
@@ -95,24 +93,37 @@ class PointwiseSemanticHead(nn.Module):
        ignore_flag = fg_pt_flag ^ (enlarge_box_idx > -1)
        seg_targets[ignore_flag] = -1
-        for k in range(gt_bboxes_3d.shape[0]):
+        for k in range(len(gt_bboxes_3d)):
            k_box_flag = box_idx == k
            # no point in current box (caused by velodyne reduce)
            if not k_box_flag.any():
                continue
            fg_voxels = voxel_centers[k_box_flag]
-            transformed_voxels = fg_voxels - gt_bboxes_3d[k, 0:3]
+            transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k]
            transformed_voxels = box_torch_ops.rotation_3d_in_axis(
                transformed_voxels.unsqueeze(0),
-                -gt_bboxes_3d[k, 6].view(1),
+                -gt_bboxes_3d.yaw[k].view(1),
                axis=2)
-            part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d[
+            part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d.dims[
-                k, 3:6] + voxel_centers.new_tensor([0.5, 0.5, 0])
+                k] + voxel_centers.new_tensor([0.5, 0.5, 0])
        part_targets = torch.clamp(part_targets, min=0)
        return seg_targets, part_targets
    def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d):
+        """generate segmentation and part prediction targets
+        Args:
+            voxel_centers (torch.Tensor): shape [voxel_num, 3],
+                the center of voxels
+            gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): list of gt boxes
+                containing tensor of shape [box_num, 7].
+            gt_labels_3d (list[torch.Tensor]): list of GT labels.
+        Returns:
+            tuple : segmentation targets with shape [voxel_num]
+                part prediction targets with shape [voxel_num, 3]
+        """
        batch_size = len(gt_labels_3d)
        voxel_center_list = []
        for idx in range(batch_size):

--- a/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+++ b/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
@@ -8,7 +8,7 @@ from ..builder import build_head, build_roi_extractor
 from .base_3droi_head import Base3DRoIHead
-@HEADS.register_module
+@HEADS.register_module()
 class PartAggregationROIHead(Base3DRoIHead):
    """Part aggregation roi head for PartA2"""
@@ -174,7 +174,7 @@ class PartAggregationROIHead(Base3DRoIHead):
            cur_proposal_list = proposal_list[batch_idx]
            cur_boxes = cur_proposal_list['boxes_3d']
            cur_labels_3d = cur_proposal_list['labels_3d']
-            cur_gt_bboxes = gt_bboxes_3d[batch_idx]
+            cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
            cur_gt_labels = gt_labels_3d[batch_idx]
            batch_num_gts = 0
@@ -189,7 +189,7 @@ class PartAggregationROIHead(Base3DRoIHead):
                    pred_per_cls = (cur_labels_3d == i)
                    cur_assign_res = assigner.assign(
                        cur_boxes[pred_per_cls],
-                        cur_gt_bboxes[gt_per_cls],
+                        cur_gt_bboxes.tensor[gt_per_cls],
                        gt_labels=cur_gt_labels[gt_per_cls])
                    # gather assign_results in different class into one result
                    batch_num_gts += cur_assign_res.num_gts
@@ -215,11 +215,11 @@ class PartAggregationROIHead(Base3DRoIHead):
                                             batch_gt_labels)
            else:  # for single class
                assign_result = self.bbox_assigner.assign(
-                    cur_boxes, cur_gt_bboxes, gt_labels=cur_gt_labels)
+                    cur_boxes, cur_gt_bboxes.tensor, gt_labels=cur_gt_labels)
            # sample boxes
            sampling_result = self.bbox_sampler.sample(assign_result,
                                                       cur_boxes,
-                                                       cur_gt_bboxes,
+                                                       cur_gt_bboxes.tensor,
                                                       cur_gt_labels)
            sampling_results.append(sampling_result)
        return sampling_results

--- a/tests/test_box3d.py
+++ b/tests/test_box3d.py
@@ -13,6 +13,46 @@ def test_lidar_boxes3d():
    assert boxes.tensor.shape[0] == 0
    assert boxes.tensor.shape[1] == 7
+    # Test init with origin
+    gravity_center_box = np.array(
+        [[
+            -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 2.06200000e+00,
+            4.40900000e+00, 1.54800000e+00, -1.48801203e+00
+        ],
+         [
+             -2.66751588e+01, 5.59499564e+00, -9.14345860e-01, 3.43000000e-01,
+             4.58000000e-01, 7.82000000e-01, -4.62759755e+00
+         ],
+         [
+             -5.80979675e+00, 3.54092357e+01, 2.00889888e-01, 2.39600000e+00,
+             3.96900000e+00, 1.73200000e+00, -4.65203216e+00
+         ],
+         [
+             -3.13086877e+01, 1.09007628e+00, -1.94612112e-01, 1.94400000e+00,
+             3.85700000e+00, 1.72300000e+00, -2.81427027e+00
+         ]],
+        dtype=np.float32)
+    bottom_center_box = LiDARInstance3DBoxes(
+        gravity_center_box, origin=[0.5, 0.5, 0.5])
+    expected_tensor = torch.tensor(
+        [[
+            -5.24223238e+00, 4.00209696e+01, -4.76429619e-01, 2.06200000e+00,
+            4.40900000e+00, 1.54800000e+00, -1.48801203e+00
+        ],
+         [
+             -2.66751588e+01, 5.59499564e+00, -1.30534586e+00, 3.43000000e-01,
+             4.58000000e-01, 7.82000000e-01, -4.62759755e+00
+         ],
+         [
+             -5.80979675e+00, 3.54092357e+01, -6.65110112e-01, 2.39600000e+00,
+             3.96900000e+00, 1.73200000e+00, -4.65203216e+00
+         ],
+         [
+             -3.13086877e+01, 1.09007628e+00, -1.05611211e+00, 1.94400000e+00,
+             3.85700000e+00, 1.72300000e+00, -2.81427027e+00
+         ]])
+    assert torch.allclose(expected_tensor, bottom_center_box.tensor)
    # Test init with numpy array
    np_boxes = np.array(
        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
@@ -313,6 +353,8 @@ def test_boxes_conversion():
         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
    cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR,
                                       Box3DMode.CAM)
+    expected_box = lidar_boxes.convert_to(Box3DMode.CAM)
+    assert torch.equal(expected_box.tensor, cam_box_tensor)
    # Some properties should be the same
    cam_boxes = CameraInstance3DBoxes(cam_box_tensor)
@@ -342,7 +384,7 @@ def test_boxes_conversion():
    # test similar mode conversion
    same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH,
                                     Box3DMode.DEPTH)
-    assert (same_results == depth_box_tensor).all()
+    assert torch.equal(same_results, depth_box_tensor)
    # test conversion with a given rt_mat
    camera_boxes = CameraInstance3DBoxes(
@@ -418,8 +460,10 @@ def test_boxes_conversion():
         [1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]],
        dtype=torch.float32)
    depth_boxes = DepthInstance3DBoxes(depth_boxes)
-    depth_to_lidar_box = Box3DMode.convert(depth_boxes, Box3DMode.DEPTH,
+    depth_to_lidar_box = depth_boxes.convert_to(Box3DMode.LIDAR)
-                                           Box3DMode.LIDAR)
+    expected_box = depth_to_lidar_box.convert_to(Box3DMode.DEPTH)
+    assert torch.equal(depth_boxes.tensor, expected_box.tensor)
    lidar_to_depth_box = Box3DMode.convert(depth_to_lidar_box, Box3DMode.LIDAR,
                                           Box3DMode.DEPTH)
    assert torch.allclose(depth_boxes.tensor, lidar_to_depth_box.tensor)
@@ -430,6 +474,8 @@ def test_boxes_conversion():
                                         Box3DMode.CAM)
    cam_to_depth_box = Box3DMode.convert(depth_to_cam_box, Box3DMode.CAM,
                                         Box3DMode.DEPTH)
+    expected_tensor = depth_to_cam_box.convert_to(Box3DMode.DEPTH)
+    assert torch.equal(expected_tensor.tensor, cam_to_depth_box.tensor)
    assert torch.allclose(depth_boxes.tensor, cam_to_depth_box.tensor)
    assert torch.allclose(depth_boxes.volume, cam_to_depth_box.volume)

--- a/tests/test_pipeline/test_outdoor_pipeline.py
+++ b/tests/test_pipeline/test_outdoor_pipeline.py
@@ -5,7 +5,7 @@ from mmdet3d.core.bbox import LiDARInstance3DBoxes
 from mmdet3d.datasets.pipelines import Compose
-def test_outdoor_pipeline():
+def test_outdoor_aug_pipeline():
    point_cloud_range = [0, -40, -3, 70.4, 40, 1]
    class_names = ['Car']
    np.random.seed(0)
@@ -119,3 +119,124 @@ def test_outdoor_pipeline():
         [8.9259, -1.2578, -1.6081, 1.5223, 3.0350, 1.3308, -1.7212]])
    assert torch.allclose(
        output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)
+def test_outdoor_velocity_aug_pipeline():
+    point_cloud_range = [-50, -50, -5, 50, 50, 3]
+    class_names = [
+        'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+        'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+    ]
+    np.random.seed(0)
+    train_pipeline = [
+        dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
+        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+        dict(
+            type='GlobalRotScale',
+            rot_uniform_noise=[-0.3925, 0.3925],
+            scaling_uniform_noise=[0.95, 1.05],
+            trans_normal_noise=[0, 0, 0]),
+        dict(type='RandomFlip3D', flip_ratio=0.5),
+        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+        dict(type='PointShuffle'),
+        dict(type='DefaultFormatBundle3D', class_names=class_names),
+        dict(
+            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+    ]
+    pipeline = Compose(train_pipeline)
+    gt_bboxes_3d = LiDARInstance3DBoxes(
+        torch.tensor(
+            [[
+                -5.2422e+00, 4.0021e+01, -4.7643e-01, 2.0620e+00, 4.4090e+00,
+                1.5480e+00, -1.4880e+00, 8.5338e-03, 4.4934e-02
+            ],
+             [
+                 -2.6675e+01, 5.5950e+00, -1.3053e+00, 3.4300e-01, 4.5800e-01,
+                 7.8200e-01, -4.6276e+00, -4.3284e-04, -1.8543e-03
+             ],
+             [
+                 -5.8098e+00, 3.5409e+01, -6.6511e-01, 2.3960e+00, 3.9690e+00,
+                 1.7320e+00, -4.6520e+00, 0.0000e+00, 0.0000e+00
+             ],
+             [
+                 -3.1309e+01, 1.0901e+00, -1.0561e+00, 1.9440e+00, 3.8570e+00,
+                 1.7230e+00, -2.8143e+00, -2.7606e-02, -8.0573e-02
+             ],
+             [
+                 -4.5642e+01, 2.0136e+01, -2.4681e-02, 1.9870e+00, 4.4400e+00,
+                 1.9420e+00, 2.8336e-01, 0.0000e+00, 0.0000e+00
+             ],
+             [
+                 -5.1617e+00, 1.8305e+01, -1.0879e+00, 2.3230e+00, 4.8510e+00,
+                 1.3710e+00, -1.5803e+00, 0.0000e+00, 0.0000e+00
+             ],
+             [
+                 -2.5285e+01, 4.1442e+00, -1.2713e+00, 1.7550e+00, 1.9890e+00,
+                 2.2200e+00, -4.4900e+00, -3.1784e-02, -1.5291e-01
+             ],
+             [
+                 -2.2611e+00, 1.9170e+01, -1.1452e+00, 9.1900e-01, 1.1230e+00,
+                 1.9310e+00, 4.7790e-02, 6.7684e-02, -1.7537e+00
+             ],
+             [
+                 -6.5878e+01, 1.3500e+01, -2.2528e-01, 1.8200e+00, 3.8520e+00,
+                 1.5450e+00, -2.8757e+00, 0.0000e+00, 0.0000e+00
+             ],
+             [
+                 -5.4490e+00, 2.8363e+01, -7.7275e-01, 2.2360e+00, 3.7540e+00,
+                 1.5590e+00, -4.6520e+00, -7.9736e-03, 7.7207e-03
+             ]],
+            dtype=torch.float32),
+        box_dim=9)
+    gt_labels_3d = np.array([0, 8, 0, 0, 0, 0, -1, 7, 0, 0])
+    results = dict(
+        pts_filename='tests/data/kitti/a.bin',
+        ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),
+        bbox3d_fields=[],
+    )
+    output = pipeline(results)
+    expected_tensor = torch.tensor(
+        [[
+            -3.7849e+00, -4.1057e+01, -4.8668e-01, 2.1064e+00, 4.5039e+00,
+            1.5813e+00, -1.6919e+00, 1.0469e-02, -4.5533e-02
+        ],
+         [
+             -2.7010e+01, -6.7551e+00, -1.3334e+00, 3.5038e-01, 4.6786e-01,
+             7.9883e-01, 1.4477e+00, -5.1440e-04, 1.8758e-03
+         ],
+         [
+             -4.5448e+00, -3.6372e+01, -6.7942e-01, 2.4476e+00, 4.0544e+00,
+             1.7693e+00, 1.4721e+00, 0.0000e+00, -0.0000e+00
+         ],
+         [
+             -3.1916e+01, -2.3379e+00, -1.0788e+00, 1.9858e+00, 3.9400e+00,
+             1.7601e+00, -3.6564e-01, -3.1333e-02, 8.1166e-02
+         ],
+         [
+             -4.5802e+01, -2.2340e+01, -2.5213e-02, 2.0298e+00, 4.5355e+00,
+             1.9838e+00, 2.8199e+00, 0.0000e+00, -0.0000e+00
+         ],
+         [
+             -4.5526e+00, -1.8887e+01, -1.1114e+00, 2.3730e+00, 4.9554e+00,
+             1.4005e+00, -1.5997e+00, 0.0000e+00, -0.0000e+00
+         ],
+         [
+             -2.5648e+01, -5.2197e+00, -1.2987e+00, 1.7928e+00, 2.0318e+00,
+             2.2678e+00, 1.3100e+00, -3.8428e-02, 1.5485e-01
+         ],
+         [
+             -1.5578e+00, -1.9657e+01, -1.1699e+00, 9.3878e-01, 1.1472e+00,
+             1.9726e+00, 3.0555e+00, 4.5907e-04, 1.7928e+00
+         ],
+         [
+             -4.4522e+00, -2.9166e+01, -7.8938e-01, 2.2841e+00, 3.8348e+00,
+             1.5925e+00, 1.4721e+00, -7.8371e-03, -8.1931e-03
+         ]])
+    assert torch.allclose(
+        output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)
--- a/tests/test_semantic_heads.py
+++ b/tests/test_semantic_heads.py
 import pytest
 import torch
+from mmdet3d.core.bbox import LiDARInstance3DBoxes
 def test_PointwiseSemanticHead():
    # PointwiseSemanticHead only support gpu version currently.
@@ -47,19 +49,29 @@ def test_PointwiseSemanticHead():
         [1, 35, 930, 469]],
        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)
    voxel_dict = dict(voxel_centers=voxel_centers, coors=coordinates)
-    gt_bboxes = list(
+    gt_bboxes = [
+        LiDARInstance3DBoxes(
+            torch.tensor(
+                [[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
+                dtype=torch.float32).cuda()),
+        LiDARInstance3DBoxes(
            torch.tensor(
-            [[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
+                [[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]],
-             [[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]]],
                dtype=torch.float32).cuda())
+    ]
+    # batch size is 2 in the unit test
    gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())
    # test get_targets
    target_dict = self.get_targets(voxel_dict, gt_bboxes, gt_labels)
    assert target_dict['seg_targets'].shape == torch.Size(
        [voxel_features.shape[0]])
+    assert torch.allclose(target_dict['seg_targets'],
+                          target_dict['seg_targets'].new_tensor([3, -1, 3, 3]))
    assert target_dict['part_targets'].shape == torch.Size(
        [voxel_features.shape[0], 3])
+    assert target_dict['part_targets'].sum() == 0
    # test loss
    loss_dict = self.loss(feats_dict, target_dict)
@@ -67,7 +79,3 @@ def test_PointwiseSemanticHead():
    assert loss_dict['loss_part'] == 0  # no points in gt_boxes
    total_loss = loss_dict['loss_seg'] + loss_dict['loss_part']
    total_loss.backward()
-if __name__ == '__main__':
-    test_PointwiseSemanticHead()