[Enhance] Use Points structure in augmentation and models (#204)

* add h3d backbone * add h3d backbone * add h3dnet * modify scannet config * fix bugs for proposal refine * fix bugs for test backbone * add primitive head test * modify h3dhead * modify h3d head * update loss weight config * fix bugs for h3d head loss * modify h3d head get targets function * update h3dnet base config * modify weighted loss * Revert "Merge branch 'h3d_u2' into 'master'" This reverts merge request !5 * modify pipeline * modify kitti pipeline * fix bugs for points rotation * modify multi sweeps * modify multi sweep points * fix bugs for points slicing * modify BackgroundPointsFilter * modify pipeline * modify unittest * modify unittest * modify docstring * modify config files * update configs * modify docstring

[Enhance] Use Points structure in augmentation and models (#204)
* add h3d backbone * add h3d backbone * add h3dnet * modify scannet config * fix bugs for proposal refine * fix bugs for test backbone * add primitive head test * modify h3dhead * modify h3d head * update loss weight config * fix bugs for h3d head loss * modify h3d head get targets function * update h3dnet base config * modify weighted loss * Revert "Merge branch 'h3d_u2' into 'master'" This reverts merge request !5 * modify pipeline * modify kitti pipeline * fix bugs for points rotation * modify multi sweeps * modify multi sweep points * fix bugs for points slicing * modify BackgroundPointsFilter * modify pipeline * modify unittest * modify unittest * modify docstring * modify config files * update configs * modify docstring
23768cba · encore-zhou · GitHub · a97fc87b · 23768cba · 23768cba
Unverified Commit 23768cba authored Nov 28, 2020 by encore-zhou Committed by GitHub Nov 28, 2020
16 changed files
--- a/mmdet3d/datasets/pipelines/loading.py
+++ b/mmdet3d/datasets/pipelines/loading.py
 import mmcv
 import numpy as np

+from mmdet3d.core.points import BasePoints, get_points_type
 from mmdet.datasets.builder import PIPELINES
 from mmdet.datasets.pipelines import LoadAnnotations

@@ -136,10 +137,16 @@ class LoadPointsFromMultiSweeps(object):
        Returns:
            np.ndarray: Points after removing.
        """
-        x_filt = np.abs(points[:, 0]) < radius
-        y_filt = np.abs(points[:, 1]) < radius
+        if isinstance(points, np.ndarray):
+            points_numpy = points
+        elif isinstance(points, BasePoints):
+            points_numpy = points.tensor.numpy()
+        else:
+            raise NotImplementedError
+        x_filt = np.abs(points_numpy[:, 0]) < radius
+        y_filt = np.abs(points_numpy[:, 1]) < radius
        not_close = np.logical_not(np.logical_and(x_filt, y_filt))
-        return points[not_close, :]
+        return points[not_close]

    def __call__(self, results):
        """Call function to load multi-sweep point clouds from files.
@@ -155,7 +162,7 @@ class LoadPointsFromMultiSweeps(object):
                - points (np.ndarray): Multi-sweep point cloud arrays.
        """
        points = results['points']
-        points[:, 4] = 0
+        points.tensor[:, 4] = 0
        sweep_points_list = [points]
        ts = results['timestamp']
        if self.pad_empty_sweeps and len(results['sweeps']) == 0:
@@ -183,9 +190,11 @@ class LoadPointsFromMultiSweeps(object):
                    'sensor2lidar_rotation'].T
                points_sweep[:, :3] += sweep['sensor2lidar_translation']
                points_sweep[:, 4] = ts - sweep_ts
+                points_sweep = points.new_point(points_sweep)
                sweep_points_list.append(points_sweep)

-        points = np.concatenate(sweep_points_list, axis=0)[:, self.use_dim]
+        points = points.cat(sweep_points_list)
+        points = points[:, self.use_dim]
        results['points'] = points
        return results

@@ -287,6 +296,11 @@ class LoadPointsFromFile(object):
    Args:
        load_dim (int): The dimension of the loaded points.
            Defaults to 6.
+        coord_type (str): The type of coordinates of points cloud.
+            Available options includes:
+            - 'LIDAR': Points in LiDAR coordinates.
+            - 'DEPTH': Points in depth coordinates, usually for indoor dataset.
+            - 'CAMERA': Points in camera coordinates.
        use_dim (list[int]): Which dimensions of the points to be used.
            Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
            or use_dim=[0, 1, 2, 3] to use the intensity dimension.
@@ -297,6 +311,7 @@ class LoadPointsFromFile(object):
    """

    def __init__(self,
+                 coord_type,
                 load_dim=6,
                 use_dim=[0, 1, 2],
                 shift_height=False,
@@ -306,7 +321,9 @@ class LoadPointsFromFile(object):
            use_dim = list(range(use_dim))
        assert max(use_dim) < load_dim, \
            f'Expect all used dimensions < {load_dim}, got {use_dim}'
+        assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH']

+        self.coord_type = coord_type
        self.load_dim = load_dim
        self.use_dim = use_dim
        self.file_client_args = file_client_args.copy()
@@ -332,6 +349,7 @@ class LoadPointsFromFile(object):
                points = np.load(pts_filename)
            else:
                points = np.fromfile(pts_filename, dtype=np.float32)
+
        return points

    def __call__(self, results):
@@ -350,12 +368,19 @@ class LoadPointsFromFile(object):
        points = self._load_points(pts_filename)
        points = points.reshape(-1, self.load_dim)
        points = points[:, self.use_dim]
+        attribute_dims = None

        if self.shift_height:
            floor_height = np.percentile(points[:, 2], 0.99)
            height = points[:, 2] - floor_height
            points = np.concatenate([points, np.expand_dims(height, 1)], 1)
+            attribute_dims = dict(height=3)
+
+        points_class = get_points_type(self.coord_type)
+        points = points_class(
+            points, points_dim=points.shape[-1], attribute_dims=attribute_dims)
        results['points'] = points
+
        return results

    def __repr__(self):

--- a/mmdet3d/datasets/pipelines/transforms_3d.py
+++ b/mmdet3d/datasets/pipelines/transforms_3d.py
@@ -140,7 +140,7 @@ class ObjectSample(object):
        Returns:
            np.ndarray: Points with those in the boxes removed.
        """
-        masks = box_np_ops.points_in_rbbox(points, boxes)
+        masks = box_np_ops.points_in_rbbox(points.coord.numpy(), boxes)
        points = points[np.logical_not(masks.any(-1))]
        return points

@@ -186,9 +186,7 @@ class ObjectSample(object):

            points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
            # check the points dimension
-            dim_inds = points.shape[-1]
-            points = np.concatenate([sampled_points[:, :dim_inds], points],
-                                    axis=0)
+            points = points.cat([sampled_points, points])

            if self.sample_2d:
                sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
@@ -258,16 +256,18 @@ class ObjectNoise(object):

        # TODO: check this inplace function
        numpy_box = gt_bboxes_3d.tensor.numpy()
+        numpy_points = points.tensor.numpy()
+
        noise_per_object_v3_(
            numpy_box,
-            points,
+            numpy_points,
            rotation_perturb=self.rot_range,
            center_noise_std=self.translation_std,
            global_random_rot_range=self.global_rot_range,
            num_try=self.num_try)

        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)
-        input_dict['points'] = points
+        input_dict['points'] = points.new_point(numpy_points)
        return input_dict

    def __repr__(self):
@@ -329,7 +329,7 @@ class GlobalRotScaleTrans(object):
        translation_std = np.array(translation_std, dtype=np.float32)
        trans_factor = np.random.normal(scale=translation_std, size=3).T

-        input_dict['points'][:, :3] += trans_factor
+        input_dict['points'].translate(trans_factor)
        input_dict['pcd_trans'] = trans_factor
        for key in input_dict['bbox3d_fields']:
            input_dict[key].translate(trans_factor)
@@ -356,6 +356,7 @@ class GlobalRotScaleTrans(object):
                    noise_rotation, input_dict['points'])
                input_dict['points'] = points
                input_dict['pcd_rotation'] = rot_mat_T
+        # input_dict['points_instance'].rotate(noise_rotation)

    def _scale_bbox_points(self, input_dict):
        """Private function to scale bounding boxes and points.
@@ -368,9 +369,12 @@ class GlobalRotScaleTrans(object):
                input_dict['bbox3d_fields'] are updated in the result dict.
        """
        scale = input_dict['pcd_scale_factor']
-        input_dict['points'][:, :3] *= scale
+        points = input_dict['points']
+        points.scale(scale)
        if self.shift_height:
-            input_dict['points'][:, -1] *= scale
+            assert 'height' in points.attribute_dims.keys()
+            points.tensor[:, points.attribute_dims['height']] *= scale
+        input_dict['points'] = points

        for key in input_dict['bbox3d_fields']:
            input_dict[key].scale(scale)
@@ -434,7 +438,7 @@ class PointShuffle(object):
            dict: Results after filtering, 'points' keys are updated \
                in the result dict.
        """
-        np.random.shuffle(input_dict['points'])
+        input_dict['points'].shuffle()
        return input_dict

    def __repr__(self):
@@ -496,8 +500,7 @@ class PointsRangeFilter(object):
    """

    def __init__(self, point_cloud_range):
-        self.pcd_range = np.array(
-            point_cloud_range, dtype=np.float32)[np.newaxis, :]
+        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)

    def __call__(self, input_dict):
        """Call function to filter points by the range.
@@ -510,10 +513,8 @@ class PointsRangeFilter(object):
                in the result dict.
        """
        points = input_dict['points']
-        points_mask = ((points[:, :3] >= self.pcd_range[:, :3])
-                       & (points[:, :3] < self.pcd_range[:, 3:]))
-        points_mask = points_mask[:, 0] & points_mask[:, 1] & points_mask[:, 2]
-        clean_points = points[points_mask, :]
+        points_mask = points.in_range_3d(self.pcd_range)
+        clean_points = points[points_mask]
        input_dict['points'] = clean_points
        return input_dict

@@ -619,6 +620,7 @@ class IndoorPointSample(object):
        points = results['points']
        points, choices = self.points_random_sampling(
            points, self.num_points, return_choices=True)
+
        pts_instance_mask = results.get('pts_instance_mask', None)
        pts_semantic_mask = results.get('pts_semantic_mask', None)
        results['points'] = points
@@ -674,9 +676,11 @@ class BackgroundPointsFilter(object):
        gt_bboxes_3d_np[:, :3] = gt_bboxes_3d.gravity_center.numpy()
        enlarged_gt_bboxes_3d = gt_bboxes_3d_np.copy()
        enlarged_gt_bboxes_3d[:, 3:6] += self.bbox_enlarge_range
-        foreground_masks = box_np_ops.points_in_rbbox(points, gt_bboxes_3d_np)
+        points_numpy = points.tensor.numpy()
+        foreground_masks = box_np_ops.points_in_rbbox(points_numpy,
+                                                      gt_bboxes_3d_np)
        enlarge_foreground_masks = box_np_ops.points_in_rbbox(
-            points, enlarged_gt_bboxes_3d)
+            points_numpy, enlarged_gt_bboxes_3d)
        foreground_masks = foreground_masks.max(1)
        enlarge_foreground_masks = enlarge_foreground_masks.max(1)
        valid_masks = ~np.logical_and(~foreground_masks,
@@ -770,7 +774,8 @@ class VoxelBasedPointSampler(object):
        # Extend points with seg and mask fields
        map_fields2dim = []
        start_dim = original_dim
-        extra_channel = [points]
+        points_numpy = points.tensor.numpy()
+        extra_channel = [points_numpy]
        for idx, key in enumerate(results['pts_mask_fields']):
            map_fields2dim.append((key, idx + start_dim))
            extra_channel.append(results[key][..., None])
@@ -780,15 +785,15 @@ class VoxelBasedPointSampler(object):
            map_fields2dim.append((key, idx + start_dim))
            extra_channel.append(results[key][..., None])

-        points = np.concatenate(extra_channel, axis=-1)
+        points_numpy = np.concatenate(extra_channel, axis=-1)

        # Split points into two part, current sweep points and
        # previous sweeps points.
        # TODO: support different sampling methods for next sweeps points
        # and previous sweeps points.
-        cur_points_flag = (points[:, self.time_dim] == 0)
-        cur_sweep_points = points[cur_points_flag]
-        prev_sweeps_points = points[~cur_points_flag]
+        cur_points_flag = (points_numpy[:, self.time_dim] == 0)
+        cur_sweep_points = points_numpy[cur_points_flag]
+        prev_sweeps_points = points_numpy[~cur_points_flag]
        if prev_sweeps_points.shape[0] == 0:
            prev_sweeps_points = cur_sweep_points

@@ -798,23 +803,24 @@ class VoxelBasedPointSampler(object):

        cur_sweep_points = self._sample_points(cur_sweep_points,
                                               self.cur_voxel_generator,
-                                               points.shape[1])
+                                               points_numpy.shape[1])
        if self.prev_voxel_generator is not None:
            prev_sweeps_points = self._sample_points(prev_sweeps_points,
                                                     self.prev_voxel_generator,
-                                                     points.shape[1])
+                                                     points_numpy.shape[1])

-            points = np.concatenate([cur_sweep_points, prev_sweeps_points], 0)
+            points_numpy = np.concatenate(
+                [cur_sweep_points, prev_sweeps_points], 0)
        else:
-            points = cur_sweep_points
+            points_numpy = cur_sweep_points

        if self.cur_voxel_generator._max_num_points == 1:
-            points = points.squeeze(1)
-        results['points'] = points[..., :original_dim]
+            points_numpy = points_numpy.squeeze(1)
+        results['points'] = points.new_point(points_numpy[..., :original_dim])

        # Restore the correspoinding seg and mask fields
        for key, dim_index in map_fields2dim:
-            results[key] = points[..., dim_index]
+            results[key] = points_numpy[..., dim_index]

        return results


--- a/tests/test_dataset/test_dataset_wrappers.py
+++ b/tests/test_dataset/test_dataset_wrappers.py
 import numpy as np
+import torch

 from mmdet3d.datasets.builder import build_dataset


 def test_getitem():
-    np.random.seed(0)
+    np.random.seed(1)
+    torch.manual_seed(1)
    point_cloud_range = [-50, -50, -5, 50, 50, 3]
    file_client_args = dict(backend='disk')
    class_names = [
@@ -14,6 +16,7 @@ def test_getitem():
    pipeline = [
        dict(
            type='LoadPointsFromFile',
+            coord_type='LIDAR',
            load_dim=5,
            use_dim=5,
            file_client_args=file_client_args),
@@ -63,12 +66,13 @@ def test_getitem():
            box_type_3d='LiDAR'))
    nus_dataset = build_dataset(dataset_cfg)
    assert len(nus_dataset) == 20
-    data = nus_dataset[0]
-    assert data['img_metas'].data['flip'] is False
-    assert data['img_metas'].data['pcd_horizontal_flip'] is False
-    assert data['points']._data.shape == (901, 5)

-    data = nus_dataset[1]
+    data = nus_dataset[0]
    assert data['img_metas'].data['flip'] is True
    assert data['img_metas'].data['pcd_horizontal_flip'] is True
    assert data['points']._data.shape == (537, 5)
+
+    data = nus_dataset[2]
+    assert data['img_metas'].data['flip'] is False
+    assert data['img_metas'].data['pcd_horizontal_flip'] is False
+    assert data['points']._data.shape == (901, 5)
--- a/tests/test_dataset/test_kitti_dataset.py
+++ b/tests/test_dataset/test_kitti_dataset.py
@@ -14,6 +14,7 @@ def test_getitem():
    pts_prefix = 'velodyne_reduced'
    pipeline = [{
        'type': 'LoadPointsFromFile',
+        'coord_type': 'LIDAR',
        'load_dim': 4,
        'use_dim': 4,
        'file_client_args': {
@@ -97,6 +98,7 @@ def test_evaluate():
    pts_prefix = 'velodyne_reduced'
    pipeline = [{
        'type': 'LoadPointsFromFile',
+        'coord_type': 'LIDAR',
        'load_dim': 4,
        'use_dim': 4,
        'file_client_args': {
@@ -172,6 +174,7 @@ def test_show():
    pipeline = [
        dict(
            type='LoadPointsFromFile',
+            coord_type='LIDAR',
            load_dim=4,
            use_dim=4,
            file_client_args=file_client_args),
@@ -227,6 +230,7 @@ def test_format_results():
    pts_prefix = 'velodyne_reduced'
    pipeline = [{
        'type': 'LoadPointsFromFile',
+        'coord_type': 'LIDAR',
        'load_dim': 4,
        'use_dim': 4,
        'file_client_args': {
@@ -309,6 +313,7 @@ def test_bbox2result_kitti2d():
    pts_prefix = 'velodyne_reduced'
    pipeline = [{
        'type': 'LoadPointsFromFile',
+        'coord_type': 'LIDAR',
        'load_dim': 4,
        'use_dim': 4,
        'file_client_args': {

--- a/tests/test_dataset/test_lyft_dataset.py
+++ b/tests/test_dataset/test_lyft_dataset.py
@@ -7,6 +7,7 @@ from mmdet3d.datasets import LyftDataset

 def test_getitem():
    np.random.seed(0)
+    torch.manual_seed(0)
    root_path = './tests/data/lyft'
    ann_file = './tests/data/lyft/lyft_infos.pkl'
    class_names = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',
@@ -15,6 +16,7 @@ def test_getitem():
    pipelines = [
        dict(
            type='LoadPointsFromFile',
+            coord_type='LIDAR',
            load_dim=5,
            use_dim=5,
            file_client_args=dict(backend='disk')),
@@ -56,27 +58,28 @@ def test_getitem():
    assert np.allclose(pcd_rotation, pcd_rotation_expected, 1e-3)
    assert sample_idx == \
        'b98a05255ba2632e957884758cb31f0e6fcc8d3cd6ee76b6d0ba55b72f08fc54'
-    expected_points = torch.tensor([[59.1695, -1.2910, 7.0296, 0.2000],
-                                    [52.4867, -4.0315, 6.7057, 0.0000],
-                                    [52.5683, -4.2178, 6.7179, 0.0000],
+    expected_points = torch.tensor([[61.4785, -3.7393, 6.7699, 0.4001],
                                    [47.7904, -3.9887, 6.0926, 0.0000],
-                                    [59.8226, -1.5522, 6.5867, 0.4001],
-                                    [53.0842, -3.7064, 6.7811, 0.0000],
-                                    [49.9896, -4.5202, 5.8823, 0.2000],
+                                    [52.5683, -4.2178, 6.7179, 0.0000],
+                                    [52.4867, -4.0315, 6.7057, 0.0000],
                                    [59.8372, -1.7366, 6.5864, 0.4001],
-                                    [61.4597, -4.6402, 7.3340, 0.2000],
-                                    [61.4785, -3.7393, 6.7699, 0.4001],
-                                    [53.0702, -3.8868, 6.7807, 0.0000],
-                                    [59.8244, -1.3499, 6.5895, 0.4001],
+                                    [53.0842, -3.7064, 6.7811, 0.0000],
                                    [60.5549, -3.4978, 6.6578, 0.4001],
+                                    [59.1695, -1.2910, 7.0296, 0.2000],
+                                    [53.0702, -3.8868, 6.7807, 0.0000],
                                    [47.9579, -4.1648, 5.6219, 0.2000],
-                                    [61.2858, -4.2254, 7.3089, 0.2000]])
+                                    [59.8226, -1.5522, 6.5867, 0.4001],
+                                    [61.2858, -4.2254, 7.3089, 0.2000],
+                                    [49.9896, -4.5202, 5.8823, 0.2000],
+                                    [61.4597, -4.6402, 7.3340, 0.2000],
+                                    [59.8244, -1.3499, 6.5895, 0.4001]])
    expected_gt_bboxes_3d = torch.tensor(
        [[63.2257, 17.5206, -0.6307, 2.0109, 5.1652, 1.9471, -1.5868],
         [-25.3804, 27.4598, -2.3297, 2.7412, 8.4792, 3.4343, -1.5939],
         [-15.2098, -7.0109, -2.2566, 0.7931, 0.8410, 1.7916, 1.5090]])
    expected_gt_labels = np.array([0, 4, 7])
    original_classes = lyft_dataset.CLASSES
+
    assert torch.allclose(points, expected_points, 1e-2)
    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
    assert np.all(gt_labels_3d.numpy() == expected_gt_labels)

--- a/tests/test_dataset/test_nuscene_dataset.py
+++ b/tests/test_dataset/test_nuscene_dataset.py
@@ -14,6 +14,7 @@ def test_getitem():
    pipeline = [
        dict(
            type='LoadPointsFromFile',
+            coord_type='LIDAR',
            load_dim=5,
            use_dim=5,
            file_client_args=file_client_args),

--- a/tests/test_dataset/test_scannet_dataset.py
+++ b/tests/test_dataset/test_scannet_dataset.py
@@ -16,6 +16,7 @@ def test_getitem():
    pipelines = [
        dict(
            type='LoadPointsFromFile',
+            coord_type='DEPTH',
            shift_height=True,
            load_dim=6,
            use_dim=[0, 1, 2]),

--- a/tests/test_dataset/test_sunrgbd_dataset.py
+++ b/tests/test_dataset/test_sunrgbd_dataset.py
@@ -14,6 +14,7 @@ def test_getitem():
    pipelines = [
        dict(
            type='LoadPointsFromFile',
+            coord_type='DEPTH',
            shift_height=True,
            load_dim=6,
            use_dim=[0, 1, 2]),

--- a/tests/test_pipeline/test_indoor_pipeline.py
+++ b/tests/test_pipeline/test_indoor_pipeline.py
@@ -17,6 +17,7 @@ def test_scannet_pipeline():
    pipelines = [
        dict(
            type='LoadPointsFromFile',
+            coord_type='DEPTH',
            shift_height=True,
            load_dim=6,
            use_dim=[0, 1, 2]),
@@ -110,6 +111,7 @@ def test_sunrgbd_pipeline():
    pipelines = [
        dict(
            type='LoadPointsFromFile',
+            coord_type='DEPTH',
            shift_height=True,
            load_dim=6,
            use_dim=[0, 1, 2]),

--- a/tests/test_pipeline/test_indoor_sample.py
+++ b/tests/test_pipeline/test_indoor_sample.py
 import numpy as np

+from mmdet3d.core.points import DepthPoints
 from mmdet3d.datasets.pipelines import IndoorPointSample


@@ -17,14 +18,15 @@ def test_indoor_sample():
                               [1.1188195, -0.99211365, 2.5551798, 2.6340485],
                               [-0.9186557, -1.7041215, 2.0562649, 2.1351335],
                               [-1.0128691, -1.3394243, 0.040936, 0.1198047]])
-    scannet_results['points'] = scannet_points
+    scannet_results['points'] = DepthPoints(
+        scannet_points, points_dim=4, attribute_dims=dict(height=3))
    scannet_pts_instance_mask = np.array(
        [15, 12, 11, 38, 0, 18, 17, 12, 17, 0])
    scannet_results['pts_instance_mask'] = scannet_pts_instance_mask
    scannet_pts_semantic_mask = np.array([38, 1, 1, 40, 0, 40, 1, 1, 1, 0])
    scannet_results['pts_semantic_mask'] = scannet_pts_semantic_mask
    scannet_results = scannet_sample_points(scannet_results)
-    scannet_points_result = scannet_results['points']
+    scannet_points_result = scannet_results['points'].tensor.numpy()
    scannet_instance_labels_result = scannet_results['pts_instance_mask']
    scannet_semantic_labels_result = scannet_results['pts_semantic_mask']
    scannet_choices = np.array([2, 8, 4, 9, 1])
@@ -48,10 +50,11 @@ def test_indoor_sample():
         [-0.74624217, 1.5244724, -0.8678476, 0.41810507],
         [0.56485355, 1.5747732, -0.804522, 0.4814307],
         [-0.0913099, 1.3673826, -1.2800645, 0.00588822]])
-    sunrgbd_results['points'] = sunrgbd_point_cloud
+    sunrgbd_results['points'] = DepthPoints(
+        sunrgbd_point_cloud, points_dim=4, attribute_dims=dict(height=3))
    sunrgbd_results = sunrgbd_sample_points(sunrgbd_results)
    sunrgbd_choices = np.array([2, 8, 4, 9, 1])
-    sunrgbd_points_result = sunrgbd_results['points']
+    sunrgbd_points_result = sunrgbd_results['points'].tensor.numpy()
    repr_str = repr(sunrgbd_sample_points)
    expected_repr_str = 'IndoorPointSample(num_points=5)'
    assert repr_str == expected_repr_str

--- a/tests/test_pipeline/test_load_points_from_multi_sweeps.py
+++ b/tests/test_pipeline/test_load_points_from_multi_sweeps.py
 import numpy as np

+from mmdet3d.core.points import LiDARPoints
 from mmdet3d.datasets.pipelines.loading import LoadPointsFromMultiSweeps


@@ -28,14 +29,14 @@ def test_load_points_from_multi_sweeps():
        test_mode=True)

    points = np.random.random([100, 5]) * 2
-
+    points = LiDARPoints(points, points_dim=5)
    input_results = dict(points=points, sweeps=[], timestamp=None)
    results = load_points_from_multi_sweeps_1(input_results)
-    assert results['points'].shape == (100, 5)
+    assert results['points'].tensor.numpy().shape == (100, 5)

    input_results = dict(points=points, sweeps=[], timestamp=None)
    results = load_points_from_multi_sweeps_2(input_results)
-    assert results['points'].shape == (775, 5)
+    assert results['points'].tensor.numpy().shape == (775, 5)

    sensor2lidar_rotation = np.array(
        [[9.99999967e-01, 1.13183067e-05, 2.56845368e-04],
@@ -52,16 +53,16 @@ def test_load_points_from_multi_sweeps():

    input_results = dict(points=points, sweeps=[sweep], timestamp=1.0)
    results = load_points_from_multi_sweeps_1(input_results)
-    assert results['points'].shape == (500, 5)
+    assert results['points'].tensor.numpy().shape == (500, 5)

    input_results = dict(points=points, sweeps=[sweep], timestamp=1.0)
    results = load_points_from_multi_sweeps_2(input_results)
-    assert results['points'].shape == (451, 5)
+    assert results['points'].tensor.numpy().shape == (451, 5)

    input_results = dict(points=points, sweeps=[sweep] * 10, timestamp=1.0)
    results = load_points_from_multi_sweeps_2(input_results)
-    assert results['points'].shape == (3259, 5)
+    assert results['points'].tensor.numpy().shape == (3259, 5)

    input_results = dict(points=points, sweeps=[sweep] * 10, timestamp=1.0)
    results = load_points_from_multi_sweeps_3(input_results)
-    assert results['points'].shape == (3259, 5)
+    assert results['points'].tensor.numpy().shape == (3259, 5)
--- a/tests/test_pipeline/test_loading.py
+++ b/tests/test_pipeline/test_loading.py
@@ -4,24 +4,27 @@ import pytest
 from os import path as osp

 from mmdet3d.core.bbox import DepthInstance3DBoxes
+from mmdet3d.core.points import LiDARPoints
 from mmdet3d.datasets.pipelines import (LoadAnnotations3D, LoadPointsFromFile,
                                        LoadPointsFromMultiSweeps)


 def test_load_points_from_indoor_file():
    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')
-    sunrgbd_load_points_from_file = LoadPointsFromFile(6, shift_height=True)
+    sunrgbd_load_points_from_file = LoadPointsFromFile(
+        coord_type='DEPTH', load_dim=6, shift_height=True)
    sunrgbd_results = dict()
    data_path = './tests/data/sunrgbd'
    sunrgbd_info = sunrgbd_info[0]
    sunrgbd_results['pts_filename'] = osp.join(data_path,
                                               sunrgbd_info['pts_path'])
    sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)
-    sunrgbd_point_cloud = sunrgbd_results['points']
+    sunrgbd_point_cloud = sunrgbd_results['points'].tensor.numpy()
    assert sunrgbd_point_cloud.shape == (100, 4)

    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')
-    scannet_load_data = LoadPointsFromFile(shift_height=True)
+    scannet_load_data = LoadPointsFromFile(
+        coord_type='DEPTH', shift_height=True)
    scannet_results = dict()
    data_path = './tests/data/scannet'
    scannet_info = scannet_info[0]
@@ -29,7 +32,7 @@ def test_load_points_from_indoor_file():
    scannet_results['pts_filename'] = osp.join(data_path,
                                               scannet_info['pts_path'])
    scannet_results = scannet_load_data(scannet_results)
-    scannet_point_cloud = scannet_results['points']
+    scannet_point_cloud = scannet_results['points'].tensor.numpy()
    repr_str = repr(scannet_load_data)
    expected_repr_str = 'LoadPointsFromFile(shift_height=True, ' \
                        'file_client_args={\'backend\': \'disk\'}), ' \
@@ -40,25 +43,27 @@ def test_load_points_from_indoor_file():

 def test_load_points_from_outdoor_file():
    data_path = 'tests/data/kitti/a.bin'
-    load_points_from_file = LoadPointsFromFile(4, 4)
+    load_points_from_file = LoadPointsFromFile(
+        coord_type='LIDAR', load_dim=4, use_dim=4)
    results = dict()
    results['pts_filename'] = data_path
    results = load_points_from_file(results)
-    points = results['points']
+    points = results['points'].tensor.numpy()
    assert points.shape == (50, 4)
    assert np.allclose(points.sum(), 2637.479)

-    load_points_from_file = LoadPointsFromFile(4, [0, 1, 2, 3])
+    load_points_from_file = LoadPointsFromFile(
+        coord_type='LIDAR', load_dim=4, use_dim=[0, 1, 2, 3])
    results = dict()
    results['pts_filename'] = data_path
    results = load_points_from_file(results)
-    new_points = results['points']
+    new_points = results['points'].tensor.numpy()
    assert new_points.shape == (50, 4)
    assert np.allclose(points.sum(), 2637.479)
    np.equal(points, new_points)

    with pytest.raises(AssertionError):
-        LoadPointsFromFile(4, 5)
+        LoadPointsFromFile(coord_type='LIDAR', load_dim=4, use_dim=5)


 def test_load_annotations3D():
@@ -123,14 +128,14 @@ def test_load_points_from_multi_sweeps():
            [[9.99979347e-01, 3.99870769e-04, 6.41441690e-03],
             [-4.42034222e-04, 9.99978299e-01, 6.57316197e-03],
             [-6.41164929e-03, -6.57586161e-03, 9.99957824e-01]]))
-    results = dict(
-        points=np.array([[1., 2., 3., 4., 5.], [1., 2., 3., 4., 5.],
-                         [1., 2., 3., 4., 5.]]),
-        timestamp=1537290014899034,
-        sweeps=[sweep])
+    points = LiDARPoints(
+        np.array([[1., 2., 3., 4., 5.], [1., 2., 3., 4., 5.],
+                  [1., 2., 3., 4., 5.]]),
+        points_dim=5)
+    results = dict(points=points, timestamp=1537290014899034, sweeps=[sweep])

    results = load_points_from_multi_sweeps(results)
-    points = results['points']
+    points = results['points'].tensor.numpy()
    repr_str = repr(load_points_from_multi_sweeps)
    expected_repr_str = 'LoadPointsFromMultiSweeps(sweeps_num=10)'
    assert repr_str == expected_repr_str

--- a/tests/test_pipeline/test_outdoor_pipeline.py
+++ b/tests/test_pipeline/test_outdoor_pipeline.py
@@ -11,7 +11,11 @@ def test_outdoor_aug_pipeline():
    np.random.seed(0)

    train_pipeline = [
-        dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='LIDAR',
+            load_dim=4,
+            use_dim=4),
        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
        dict(
            type='ObjectNoise',
@@ -130,7 +134,11 @@ def test_outdoor_velocity_aug_pipeline():
    np.random.seed(0)

    train_pipeline = [
-        dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='LIDAR',
+            load_dim=4,
+            use_dim=4),
        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
        dict(
            type='GlobalRotScaleTrans',

--- a/tests/test_pipeline/test_test_augment_utils.py
+++ b/tests/test_pipeline/test_test_augment_utils.py
 import numpy as np
 import torch

+from mmdet3d.core.points import DepthPoints
 from mmdet3d.datasets.pipelines import MultiScaleFlipAug3D


@@ -42,6 +43,7 @@ def test_multi_scale_flip_aug_3D():
                       [1.5090443, 3.2764456, -1.1913797, 0.02097607],
                       [-1.373904, 3.8711405, 0.8524302, 2.064786],
                       [-1.8139812, 3.538856, -1.0056694, 0.20668638]])
+    points = DepthPoints(points, points_dim=4, attribute_dims=dict(height=3))
    results = dict(
        points=points,
        pts_file_name=pts_file_name,
@@ -53,6 +55,7 @@ def test_multi_scale_flip_aug_3D():
        [[-2.2095, 3.3160, -0.7707, 0.4417], [-1.3739, 3.8711, 0.8524, 2.0648],
         [-1.8140, 3.5389, -1.0057, 0.2067], [0.2040, 1.4268, -1.0504, 0.1620],
         [1.5090, 3.2764, -1.1914, 0.0210]],
-        dtype=torch.float64)
+        dtype=torch.float32)
+
    assert torch.allclose(
        results['points'][0]._data, expected_points, atol=1e-4)
--- a/tests/test_pipeline/test_transforms_3d.py
+++ b/tests/test_pipeline/test_transforms_3d.py
@@ -4,6 +4,7 @@ import pytest
 import torch

 from mmdet3d.core import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
+from mmdet3d.core.points import LiDARPoints
 from mmdet3d.datasets import (BackgroundPointsFilter, ObjectNoise,
                              ObjectSample, RandomFlip3D,
                              VoxelBasedPointSampler)
@@ -32,9 +33,9 @@ def test_remove_points_in_boxes():
         [20.2630, 5.1947, -1.4799, 0.7300, 1.7600, 1.7300, 1.5100],
         [18.2496, 3.1887, -1.6109, 0.5600, 1.6800, 1.7100, 1.5600],
         [7.7396, -4.3245, -1.5801, 0.5600, 1.7900, 1.8000, -0.8300]])
-
+    points = LiDARPoints(points, points_dim=4)
    points = ObjectSample.remove_points_in_boxes(points, boxes)
-    assert points.shape == (10, 4)
+    assert points.tensor.numpy().shape == (10, 4)


 def test_object_sample():
@@ -80,6 +81,7 @@ def test_object_sample():
        else:
            gt_labels.append(-1)
    gt_labels = np.array(gt_labels, dtype=np.long)
+    points = LiDARPoints(points, points_dim=4)
    input_dict = dict(
        points=points, gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels)
    input_dict = object_sample(input_dict)
@@ -96,7 +98,7 @@ def test_object_sample():
                        'classes=[\'Pedestrian\', \'Cyclist\', \'Car\'], ' \
                        'sample_groups={\'Pedestrian\': 6}'
    assert repr_str == expected_repr_str
-    assert points.shape == (800, 4)
+    assert points.tensor.numpy().shape == (800, 4)
    assert gt_bboxes_3d.tensor.shape == (1, 7)
    assert np.all(gt_labels_3d == [0])

@@ -119,6 +121,7 @@ def test_object_noise():
                                  axis=1).astype(np.float32)
    gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
        Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
+    points = LiDARPoints(points, points_dim=4)
    input_dict = dict(points=points, gt_bboxes_3d=gt_bboxes_3d)
    input_dict = object_noise(input_dict)
    points = input_dict['points']
@@ -132,7 +135,7 @@ def test_object_noise():
                        'rot_range=[-0.15707963267, 0.15707963267])'

    assert repr_str == expected_repr_str
-    assert points.shape == (800, 4)
+    assert points.tensor.numpy().shape == (800, 4)
    assert torch.allclose(gt_bboxes_3d, expected_gt_bboxes_3d, 1e-3)


@@ -159,6 +162,7 @@ def test_random_flip_3d():
             [12.7557, 2.2996, -1.4869, 0.6100, 1.1100, 1.9000, -1.9390],
             [10.6677, 0.8064, -1.5435, 0.7900, 0.9600, 1.7900, 1.0856],
             [5.0903, 5.1004, -1.2694, 0.7100, 1.7000, 1.8300, -1.9136]]))
+    points = LiDARPoints(points, points_dim=4)
    input_dict = dict(
        points=points,
        bbox3d_fields=bbox3d_fields,
@@ -166,7 +170,7 @@ def test_random_flip_3d():
        img_fields=img_fields,
        gt_bboxes_3d=gt_bboxes_3d)
    input_dict = random_flip_3d(input_dict)
-    points = input_dict['points']
+    points = input_dict['points'].tensor.numpy()
    gt_bboxes_3d = input_dict['gt_bboxes_3d'].tensor
    expected_points = np.array([[22.7035, -9.3901, -0.2848, 0.0000],
                                [21.9826, -9.1766, -0.2698, 0.0000],
@@ -215,10 +219,11 @@ def test_background_points_filter():
    extra_points[:, 2] += 0.1
    extra_points = torch.cat([extra_points, extra_points.new_zeros(4, 1)], 1)
    points = np.concatenate([points, extra_points.numpy()], 0)
+    points = LiDARPoints(points, points_dim=4)
    input_dict = dict(points=points, gt_bboxes_3d=gt_bboxes_3d)
    input_dict = background_points_filter(input_dict)

-    points = input_dict['points']
+    points = input_dict['points'].tensor.numpy()
    repr_str = repr(background_points_filter)
    expected_repr_str = 'BackgroundPointsFilter(bbox_enlarge_range=' \
                        '[[0.5, 2.0, 0.5]])'
@@ -257,7 +262,7 @@ def test_voxel_based_point_filter():

    input_time = np.concatenate([np.zeros([2048, 1]), np.ones([2048, 1])], 0)
    input_points = np.concatenate([points, input_time], 1)
-
+    input_points = LiDARPoints(input_points, points_dim=4)
    input_dict = dict(
        points=input_points, pts_mask_fields=[], pts_seg_fields=[])
    input_dict = voxel_based_points_filter(input_dict)
@@ -283,9 +288,9 @@ def test_voxel_based_point_filter():

    assert repr_str == expected_repr_str
    assert points.shape == (2048, 4)
-    assert (points[:, :3].min(0) <
+    assert (points.tensor[:, :3].min(0)[0].numpy() <
            cur_sweep_cfg['point_cloud_range'][0:3]).sum() == 0
-    assert (points[:, :3].max(0) >
+    assert (points.tensor[:, :3].max(0)[0].numpy() >
            cur_sweep_cfg['point_cloud_range'][3:6]).sum() == 0

    # Test instance mask and semantic mask

--- a/tests/test_points.py
+++ b/tests/test_points.py
@@ -1049,9 +1049,3 @@ def test_depth_points():
                                        3.2690e-01
                                    ]])
    assert torch.allclose(expected_tensor, depth_points.tensor, 1e-4)
-
-
-test_base_points()
-test_cam_points()
-test_depth_points()
-test_lidar_points()