working on spconv 2.2

0da847e2 · yan.yan · bf011c76 · fe4a2e61 · 0da847e2 · 0da847e2
Commit 0da847e2 authored Nov 24, 2021 by yan.yan
7 changed files
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
 # Changelog

+## [2.1.12] - 2021-11-23
+### Added 
+- Add a method for voxel generator to get pc_voxel_id, which is usually used in semantic segmentation
+### Fixed
+- Fix a bug in cuda voxel generater when max_voxels is smaller than real number of voxels
+
+## [2.1.11] - 2021-11-22
+### Fixed
+- Fixed a bug Volta kernels (TITAN V, Tesla V100), backward weight kernels use f16 as accumulator. we should use f32.
+- Fixed a corner case when user use kernel size = 1x1 but stride != 1.
+- Fixed a corner case when input feature is non-contiguous when maxpool.
+
+## [2.1.10] - 2021-11-19
+### Fixed
+- Fixed a bug in utils.PointToVoxel, shouldn't get cuda stream in cpu code
+
+## [2.1.9] - 2021-11-18
+### Removed
+- Remove a wrong assert
+
 ## [2.1.8] - 2021-11-15
 ### Added
 - Add support for pytorch 1.5

--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -82,6 +82,8 @@ class ExampleNet(nn.Module):

 Inverse sparse convolution means "inv" of sparse convolution. the output of inverse convolution contains same indices as input of sparse convolution.

+**WARNING** ```SparseInverseConv``` isn't equivalent to ```SparseConvTranspose```. SparseConvTranspose is equivalent to ```ConvTranspose``` in pytorch, but SparseInverseConv isn't.
+
 Inverse convolution usually used in semantic segmentation.

 ```Python
@@ -112,8 +114,10 @@ voxel generator in spconv generate indices in **ZYX** order, the params format a

 generated indices don't include batch axis, you need to add it by yourself.

+see examples/voxel_gen.py for examples.
+
 ```Python
-from spconv.pytorch.utils import PointToVoxel
+from spconv.pytorch.utils import PointToVoxel, gather_features_by_pc_voxel_id
 # this generator generate ZYX indices.
 gen = PointToVoxel(
    vsize_xyz=[0.1, 0.1, 0.1], 
@@ -123,5 +127,14 @@ gen = PointToVoxel(
    max_num_points_per_voxel=5)
 pc = np.random.uniform(-10, 10, size=[1000, 3])
 pc_th = torch.from_numpy(pc)
-voxels, coords, num_points_per_voxel = gen(pc_th)
+voxels, coords, num_points_per_voxel = gen(pc_th, empty_mean=True)
+```
+
+If you want to get label for every point of your pc, you need to use another function to get pc_voxel_id and gather features from sematic segmentation result:
+```Python
+voxels, coords, num_points_per_voxel, pc_voxel_id = gen.generate_voxel_with_id(pc_th, empty_mean=True)
+seg_features = YourSegNet(...)
+# if voxel id is invalid (point out of range, or no space left in a voxel)
+# features will be zero.
+point_features = gather_features_by_pc_voxel_id(seg_features, pc_voxel_id)
 ```
\ No newline at end of file
--- a/example/voxel_gen.py
+++ b/example/voxel_gen.py
@@ -16,7 +16,7 @@ import numpy as np

 from cumm import tensorview as tv
 from spconv.utils import Point2VoxelCPU3d
-from spconv.pytorch.utils import PointToVoxel
+from spconv.pytorch.utils import PointToVoxel, gather_features_by_pc_voxel_id
 import torch

 def main_pytorch_voxel_gen():
@@ -52,34 +52,58 @@ def main_pytorch_voxel_gen():
 def main_pytorch_voxel_gen_cuda():
    np.random.seed(50051)
    # voxel gen source code: spconv/csrc/sparse/pointops.py
-    device = torch.device("cuda:0")
-    gen = PointToVoxel(vsize_xyz=[0.1, 0.1, 0.1],
-                       coors_range_xyz=[-80, -80, -6, 80, 80, 6],
+    pc = np.random.uniform(-2, 8, size=[1000, 3]).astype(np.float32)
+
+    for device in [torch.device("cuda:0"), torch.device("cpu:0")]:
+        gen = PointToVoxel(vsize_xyz=[0.25, 0.25, 0.25],
+                        coors_range_xyz=[0, 0, 0, 10, 10, 10],
                        num_point_features=3,
                        max_num_voxels=5000,
                        max_num_points_per_voxel=5,
                        device=device)

-    pc = np.random.uniform(-4, 4, size=[1000, 3]).astype(np.float32)
        pc_th = torch.from_numpy(pc).to(device)
        voxels_th, indices_th, num_p_in_vx_th = gen(pc_th)
        voxels_np = voxels_th.cpu().numpy()
        indices_np = indices_th.cpu().numpy()
        num_p_in_vx_np = num_p_in_vx_th.cpu().numpy()
-    print(f"------Raw Voxels {voxels_np.shape[0]}-------")
+        print(f"------{device} Raw Voxels {voxels_np.shape[0]}-------")
        print(voxels_np[0])
        # run voxel gen and FILL MEAN VALUE to voxel remain
        voxels_tv, indices_tv, num_p_in_vx_tv = gen(pc_th, empty_mean=True)
        voxels_np = voxels_tv.cpu().numpy()
        indices_np = indices_tv.cpu().numpy()
        num_p_in_vx_np = num_p_in_vx_tv.cpu().numpy()
-    print("------Voxels with mean filled-------")
+        print(f"------{device} Voxels with mean filled-------")
        print(voxels_np[0])
        voxels_th, indices_th, num_p_in_vx_th, pc_voxel_id = gen.generate_voxel_with_id(pc_th, empty_mean=True)
-    print("------Voxel ids for every point-------")
+        print(f"------{device} Reconstruct Indices From Voxel ids for every point-------")
+        indices_th_float = indices_th.float()
+        # we gather indices by voxel_id to see correctness of voxel id.
+        indices_th_voxel_id = gather_features_by_pc_voxel_id(indices_th_float, pc_voxel_id)
+        indices_th_voxel_id_np = indices_th_voxel_id[:10].cpu().numpy()
        print(pc[:10])
-    print(indices_th[pc_voxel_id[:10]])
+        print(indices_th_voxel_id_np[:, ::-1] / 4)
+
+def main_gather_features_by_pc_voxel_id():
+    np.random.seed(50051)
+    # voxel gen source code: spconv/csrc/sparse/pointops.py
+    device = torch.device("cuda:0")
+    gen = PointToVoxel(vsize_xyz=[0.25, 0.25, 0.25],
+                       coors_range_xyz=[-10, -10, -10, 10, 10, 10],
+                       num_point_features=3,
+                       max_num_voxels=2000,
+                       max_num_points_per_voxel=5,
+                       device=device)
+
+    pc = np.random.uniform(-8, 8, size=[5000, 3]).astype(np.float32)
+    pc_th = torch.from_numpy(pc).to(device)
+
+    voxels_th, indices_th, num_p_in_vx_th, pc_voxel_id = gen.generate_voxel_with_id(pc_th, empty_mean=True)
+    res_features_from_seg = torch.zeros((voxels_th.shape[0], 128), dtype=torch.float32, device=device)
    
+    pc_features = gather_features_by_pc_voxel_id(res_features_from_seg, pc_voxel_id)
+    print(pc.shape, pc_features.shape)

 def main():
    np.random.seed(50051)
@@ -172,3 +196,4 @@ if __name__ == "__main__":
    if torch.cuda.is_available():
        main_cuda()
        main_pytorch_voxel_gen_cuda()
+        main_gather_features_by_pc_voxel_id()
--- a/spconv/csrc/sparse/pointops.py
+++ b/spconv/csrc/sparse/pointops.py
@@ -445,7 +445,6 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
        int64_t expected_hash_data_num = points.dim(0) * 2;
        TV_ASSERT_RT_ERR(hashdata.dim(0) >= expected_hash_data_num, "hash table too small")
        TV_ASSERT_RT_ERR(point_indice_data.dim(0) >= points.dim(0), "point_indice_data too small")
-        // auto timer = tv::CudaContextTimer<>();
        num_per_voxel.zero_(ctx);
        table_t hash = table_t(hashdata.data_ptr<pair_t>(), expected_hash_data_num);
        hash.clear(custream);
@@ -462,14 +461,12 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
                        layout, voxels.dim(0));
        auto count_cpu = count.cpu();
        int count_val = count_cpu.item<int32_t>();
-        // tv::ssprint("assign_table", timer.report());
-
+        count_val = count_val > voxels.dim(0) ? voxels.dim(0) : count_val;
        launcher(kernel::generate_voxel<table_t>, hash, points.data_ptr<const {self.dtype}>(),
                point_indice_data.data_ptr<const int64_t>(), voxels.data_ptr<{self.dtype}>(),
                num_per_voxel.data_ptr<int>(), points_voxel_id.data_ptr<int64_t>(), points.dim(1), voxels.dim(1), 
                voxels.dim(0), vsize_tv, coors_range_tv,
                grid_size_tv, grid_stride_tv, points.dim(0));
-        // tv::ssprint("generate_voxel", timer.report());
        auto voxel_launcher = tv::cuda::Launch(count_val, custream);
        if (empty_mean){{
            launcher(kernel::voxel_empty_fill_mean, voxels.data_ptr<{self.dtype}>(),

--- a/spconv/pytorch/conv.py
+++ b/spconv/pytorch/conv.py
@@ -124,9 +124,11 @@ class SparseConvolution(SparseModule):
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        kv = int(np.prod(kernel_size))
-        kv_stride = int(np.prod(kernel_size))
-
-        self.conv1x1 = kv == 1 and kv_stride == 1
+        kv_stride = int(np.prod(stride))
+        self.conv1x1 = kv == 1
+        # TODO we should deprecate support for ksize == 1 but stride != 1.
+        if not subm:
+            self.conv1x1 &= kv_stride == 1
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
@@ -296,6 +298,8 @@ class SparseConvolution(SparseModule):
            if self.bias is not None:
                features += self.bias
            out_tensor = out_tensor.replace_feature(features)
+            # padding may change spatial shape of conv 1x1.
+            out_tensor.spatial_shape = out_spatial_shape
            return out_tensor
        indice_dict = input.indice_dict.copy()


--- a/spconv/pytorch/ops.py
+++ b/spconv/pytorch/ops.py
@@ -825,6 +825,9 @@ def indice_conv_backward(features: torch.Tensor,
        filter_shape_per_kv = [out_channel, filters.shape[-1]]

    kv_center = kv // 2
+    # TODO handle this in nn.Module to make sure features in backward is contiguous
+    if not features.is_contiguous():
+        features = features.contiguous()
    if not out_bp.is_contiguous():
        out_bp = out_bp.contiguous()
    assert out_bp.is_contiguous()
@@ -1246,6 +1249,9 @@ def implicit_gemm_backward(features: torch.Tensor,
        raise NotImplementedError("work in progress")
    if not out_bp.is_contiguous():
        out_bp = out_bp.contiguous()
+    if not features.is_contiguous():
+        features = features.contiguous()
+
    assert out_bp.is_contiguous()
    assert filters.is_contiguous()
    assert features.is_contiguous()
@@ -1450,6 +1456,9 @@ def indice_maxpool_backward(features, out_features, out_bp, indice_pairs,
    indice_pair_num_cpu = indice_pair_num.cpu().tolist()
    if not out_bp.is_contiguous():
        out_bp = out_bp.contiguous()
+    if not features.is_contiguous():
+        features = features.contiguous()
+
    out_features_tv = torch_tensor_to_tv(out_features)
    features_tv = torch_tensor_to_tv(features)
    out_bp_tv = torch_tensor_to_tv(out_bp)
@@ -1509,6 +1518,9 @@ def indice_maxpool_implicit_gemm_backward(features, out_features, out_bp,
    assert features.is_cuda
    if not out_bp.is_contiguous():
        out_bp = out_bp.contiguous()
+    if not features.is_contiguous():
+        features = features.contiguous()
+
    stream = get_current_stream()
    out_features_tv = torch_tensor_to_tv(out_features)
    features_tv = torch_tensor_to_tv(features)

--- a/spconv/pytorch/utils.py
+++ b/spconv/pytorch/utils.py
@@ -156,3 +156,17 @@ class PointToVoxel(object):

            return (self.voxels[:num_voxels], self.indices[:num_voxels],
                    self.num_per_voxel[:num_voxels], pc_voxel_id)
+
+
+def gather_features_by_pc_voxel_id(seg_res_features: torch.Tensor, pc_voxel_id: torch.Tensor):
+    """This function is used to gather segmentation result to match origin pc.
+    """
+    if seg_res_features.device != pc_voxel_id.device:
+        pc_voxel_id = pc_voxel_id.to(seg_res_features.device)
+    res = torch.zeros((pc_voxel_id.shape[0], seg_res_features.shape[1]), dtype=seg_res_features.dtype, device=seg_res_features.device)
+    pc_voxel_id_valid = pc_voxel_id != -1
+    pc_voxel_id_valid_ids = torch.nonzero(pc_voxel_id_valid).view(-1)
+    seg_res_features_valid = seg_res_features[pc_voxel_id[pc_voxel_id_valid_ids]]
+    res[pc_voxel_id_valid_ids] = seg_res_features_valid
+    return res 
+