v2.2.3: fix contiguous, add msg if point vanish

8b52b3a9 · yan.yan · 1661828b · 8b52b3a9 · 8b52b3a9 · 8b52b3a9
Commit 8b52b3a9 authored Sep 28, 2022 by yan.yan
7 changed files
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
 # Changelog
+## [2.2.3] - 2022-9-28
+### Fixed 
+- Fix missing .contiguous for input feature
+- Add some debug msg if points vanished.
+
 ## [2.2.2] - 2022-9-25
 ### Fixed 
 - Fix CI problem: main function too long and cause OOM in CI vm.

--- a/docs/COMMON_PROBLEMS.md
+++ b/docs/COMMON_PROBLEMS.md
@@ -27,7 +27,7 @@ Your coordinates generate nothing with some conv params. Modify your conv params
 Example:

 Conv Params:
-```spatial shape=[8, 200, 200],ksize=[3, 3, 3],stride=[2, 2, 2],padding=[0, 1, 1],dilation=[1, 1, 1]```
+```spatial shape=[8, 200, 200],ksize=3,stride=2,padding=[0, 1, 1],dilation=1```
 Coordinates:
 ```
 [[0, 7, 153, 142]]

--- a/example/libspconv/README.md
+++ b/example/libspconv/README.md
@@ -6,6 +6,8 @@ libspconv + pybindings = "core_cc.so" in spconv python package.

 run ```run_build.sh``` to get ```libspconv.so```.

+see [inference code example](main.cu)
+
 ## libspconv API

 currently not available, but you can check python code to understand how to use C++ apis, spconv python and libspconv use same c++ code.
\ No newline at end of file
--- a/spconv/csrc/sparse/all.py
+++ b/spconv/csrc/sparse/all.py
@@ -1856,7 +1856,28 @@ class SpconvOps(pccm.Class):
                }}
            }}
            // tv::ssprint("HASH SIZE", hash_size, num_act_out);
-
+            if (num_act_out == 0){{
+                std::stringstream ss;
+                ss << R"(Your points vanished here, this usually because you provide 
+conv params that may ignore some input points. Example: 
+    spatial_shape=[8, 200, 200]
+    ksize=3
+    stride=2
+    padding=[0, 1, 1]
+    dilation=1
+    Coordinates=[[0, 7, 153, 142]]
+these params will cause ALL points in z == 7 dropped because of padding_z=0.
+enlarge your spatial shape or change your conv param to make sure 
+every input point has a corresponding output point.
+Your Conv Params: )" << "\\n";
+                tv::sstream_print<'\\0'>(ss, "    spatial_shape=", input_dims, "\\n");
+                tv::sstream_print<'\\0'>(ss, "    ksize=", ksize, "\\n");
+                tv::sstream_print<'\\0'>(ss, "    stride=", stride, "\\n");
+                tv::sstream_print<'\\0'>(ss, "    padding=", padding, "\\n");
+                tv::sstream_print<'\\0'>(ss, "    dilation=", dilation, "\\n");
+                tv::ssprint(ss.str());
+                throw std::runtime_error(ss.str());
+            }}
            if (num_out_act_bound > 0 && num_act_out > num_out_act_bound){{
                num_act_out = num_out_act_bound;
            }}
@@ -2092,6 +2113,29 @@ class SpconvOps(pccm.Class):

                    // TODO pytorch unique may be faster?
                    num_act_out = apply_thrust_unique_to_indice_pairs_uniq(indice_pairs_uniq, thrustalloc, stream_int);
+                    if (num_act_out == 0){{
+                        std::stringstream ss;
+                        ss << R"(Your points vanished here, this usually because you provide 
+conv params that may ignore some input points. Example: 
+    spatial_shape=[8, 200, 200]
+    ksize=3
+    stride=2
+    padding=[0, 1, 1]
+    dilation=1
+    Coordinates=[[0, 7, 153, 142]]
+these params will cause ALL points in z == 7 dropped because of padding_z=0.
+enlarge your spatial shape or change your conv param to make sure 
+every input point has a corresponding output point.
+Your Conv Params: )" << "\\n";
+                        tv::sstream_print<'\\0'>(ss, "    spatial_shape=", input_dims, "\\n");
+                        tv::sstream_print<'\\0'>(ss, "    ksize=", ksize, "\\n");
+                        tv::sstream_print<'\\0'>(ss, "    stride=", stride, "\\n");
+                        tv::sstream_print<'\\0'>(ss, "    padding=", padding, "\\n");
+                        tv::sstream_print<'\\0'>(ss, "    dilation=", dilation, "\\n");
+                        tv::ssprint(ss.str());
+                        throw std::runtime_error(ss.str());
+                    }}
+
                    bool use_bound_algo = false;
                    int64_t num_out_bounded = num_act_out;
                    if (num_out_act_bound > 0 && num_act_out > num_out_act_bound){{

--- a/spconv/pytorch/conv.py
+++ b/spconv/pytorch/conv.py
@@ -327,7 +327,9 @@ class SparseConvolution(SparseModule):
            out_tensor.spatial_shape = out_spatial_shape
            return out_tensor
        indice_dict = input.indice_dict.copy()
-
+        # only support contiguous tensor for now
+        if not features.is_contiguous():
+            features = features.contiguous()
        algo = self.algo
        if self.indice_key is not None:
            datas = input.find_indice_pair(self.indice_key)

--- a/spconv/pytorch/ops.py
+++ b/spconv/pytorch/ops.py
@@ -51,6 +51,24 @@ DEBUG = False
 DEBUG_INT64_HASH_K = False
 INT32_MAX = SpconvOps.get_int32_max()

+_POINT_VANISH_MSG = """Your points vanished here, this usually because you provide 
+conv params that may ignore some input points. Example: 
+    spatial_shape=[8, 200, 200]
+    ksize=3
+    stride=2
+    padding=[0, 1, 1]
+    dilation=1
+    Coordinates=[[0, 7, 153, 142]]
+these params will cause ALL points in z == 7 dropped because of padding_z=0.
+enlarge your spatial shape or change your conv param to make sure 
+every input point has a corresponding output point.
+Your Conv Params: 
+    spatial_shape={}
+    ksize={}
+    stride={}
+    padding={}
+    dilation={}"""
+

 def get_conv_output_size(input_size, kernel_size, stride, padding, dilation):
    ndim = len(input_size)
@@ -239,6 +257,9 @@ def get_indice_pairs(indices: torch.Tensor,
                                                stream_int=stream)
            uniq_res = indice_pairs_uniq.unique()
            num_act_out = uniq_res.shape[0] - 1
+            if (num_act_out == 0):
+                msg = _POINT_VANISH_MSG.format(spatial_shape, ksize, stride, padding, dilation)
+                raise ValueError(msg)
            use_bound_algo = False
            if num_out_act_bound > 0 and num_act_out > num_out_act_bound:
                num_act_out = num_out_act_bound
@@ -611,6 +632,9 @@ def get_indice_pairs_implicit_gemm(
                num_act_out = uniq_res.shape[0] - 1
                uniq_out_indices_offset_tv = torch_tensor_to_tv(uniq_res)
                raw_out_indices_offset_tv = indice_pairs_uniq_tv
+            if (num_act_out == 0):
+                msg = _POINT_VANISH_MSG.format(spatial_shape, ksize, stride, padding, dilation)
+                raise ValueError(msg)

        if num_out_act_bound > 0 and num_act_out > num_out_act_bound:
            num_act_out = num_out_act_bound
@@ -1080,6 +1104,15 @@ def indice_conv_backward(features: torch.Tensor,
                         algo: ConvAlgo = ConvAlgo.Native,
                         timer: CUDAKernelTimer = CUDAKernelTimer(False)):
    # print(out_bp.mean(), out_bp.max(), out_bp.min())
+    filters_shape = filters.shape
+    # TODO handle this in nn.Module to make sure features in backward is contiguous
+    if not features.is_contiguous():
+        features = features.contiguous()
+    if not out_bp.is_contiguous():
+        out_bp = out_bp.contiguous()
+    assert out_bp.is_contiguous()
+    assert filters.is_contiguous()
+    assert features.is_contiguous()

    if SPCONV_CPP_GEMM and GEMM_CPP is not None:
        alloc = TorchAllocator(features.device)
@@ -1110,16 +1143,6 @@ def indice_conv_backward(features: torch.Tensor,
        df = alloc.allocated[AllocKeys.DFilters]
        return din, df

-    filters_shape = filters.shape
-    # TODO handle this in nn.Module to make sure features in backward is contiguous
-    if not features.is_contiguous():
-        features = features.contiguous()
-    if not out_bp.is_contiguous():
-        out_bp = out_bp.contiguous()
-    assert out_bp.is_contiguous()
-    assert filters.is_contiguous()
-    assert features.is_contiguous()
-
    if not ALL_WEIGHT_IS_KRSC:
        kv_dim = 0
        is_KC_not_CK = not FILTER_HWIO
@@ -1438,6 +1461,10 @@ def implicit_gemm(features: torch.Tensor,
    if bias is not None:
        bias_tv = torch_tensor_to_tv(bias)

+    if not features.is_contiguous():
+        features = features.contiguous()
+    assert features.is_contiguous()
+    assert filters.is_contiguous()

    if SPCONV_CPP_GEMM and CONV_CPP is not None:
        alloc = TorchAllocator(features.device)
@@ -1477,10 +1504,6 @@ def implicit_gemm(features: torch.Tensor,
    # CONV.stream_synchronize(stream)

    # t = time.time()
-    if not features.is_contiguous():
-        features = features.contiguous()
-    assert features.is_contiguous()
-    assert filters.is_contiguous()

    if features.dtype == torch.int8 or features.dtype == torch.qint8:
        raise NotImplementedError("work in progress")

--- a/version.txt
+++ b/version.txt
-2.2.2
+2.2.3