v2.1

eae6a3bd · yan.yan · fa995a4f · eae6a3bd · fa995a4f · eae6a3bd
Commit eae6a3bd authored Nov 07, 2021 by yan.yan
20 changed files
--- a/spconv/core_cc/cumm/conv/main.pyi
+++ b/spconv/core_cc/cumm/conv/main.pyi
+from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
+from pccm.stubs import EnumValue, EnumClassValue
+from ...cumm.gemm.main import GemmAlgoDesp
+from cumm.tensorview import Tensor
+class ConvAlgoDesp(GemmAlgoDesp):
+    ndim: int
+    op_type: int
+    iter_algo: int
+    layout_i: int
+    layout_w: int
+    layout_o: int
+    interleave_i: int
+    interleave_w: int
+    interleave_o: int
+    mask_sparse: bool
+    increment_k_first: bool
+    def __init__(self, ndim: int, op_type: int) -> None: 
+        """
+        Args:
+            ndim: 
+            op_type: 
+        """
+        ...
+    def __repr__(self) -> str: ...
+    @staticmethod
+    def conv_iwo_012_to_abc(op_type: int) -> List[int]: 
+        """
+        Args:
+            op_type: 
+        """
+        ...
+    @staticmethod
+    def gemm_abc_012_to_iwo(op_type: int) -> List[int]: 
+        """
+        Args:
+            op_type: 
+        """
+        ...
+    @property
+    def dtype_input(self) -> int: ...
+    @property
+    def dtype_weight(self) -> int: ...
+    @property
+    def dtype_output(self) -> int: ...
+    def supported(self, m: int, n: int, k: int, C: int, K: int, mask_width: int) -> bool: 
+        """
+        Args:
+            m: 
+            n: 
+            k: 
+            C: 
+            K: 
+            mask_width: 
+        """
+        ...
+    def query_conv_workspace_size(self, m: int, n: int, k: int, split_k_slices: int, kv: int) -> int: 
+        """
+        Args:
+            m: 
+            n: 
+            k: 
+            split_k_slices: 
+            kv: 
+        """
+        ...
+    def supported_ldx_conv(self, ldi: int, ldw: int, ldo: int) -> bool: 
+        """
+        Args:
+            ldi: 
+            ldw: 
+            ldo: 
+        """
+        ...
+class ConvParams:
+    conv_algo_desp: Any
+    input: Tensor
+    weight: Tensor
+    output: Tensor
+    split_k_slices: int
+    padding: List[int]
+    stride: List[int]
+    dilation: List[int]
+    alpha: float
+    beta: float
+    mask_width: int
+    mask_filter: int
+    reverse_mask: bool
+    verbose: bool
+    workspace: Tensor =  Tensor()
+    mask: Tensor =  Tensor()
+    mask_argsort: Tensor =  Tensor()
+    indices: Tensor =  Tensor()
+    mask_output: Tensor =  Tensor()
+    stream: int
+    def __init__(self, ndim: int, op_type: int) -> None: 
+        """
+        Args:
+            ndim: 
+            op_type: 
+        """
+        ...
+class ConvMainUnitTest:
+    @staticmethod
+    def extract_mnk(op_type: int, N: int, C: int, K: int, kernel_volume: int, in_prod: int, out_prod: int, mask_sparse: bool) -> List[int]: 
+        """
+        Args:
+            op_type: 
+            N: 
+            C: 
+            K: 
+            kernel_volume: 
+            in_prod: 
+            out_prod: 
+            mask_sparse: 
+        """
+        ...
+    @staticmethod
+    def implicit_gemm2(params: ConvParams) -> None: 
+        """
+        Args:
+            params: 
+        """
+        ...
+    @staticmethod
+    def get_all_conv_algo_desp() -> List[ConvAlgoDesp]: ...
--- a/spconv/core_cc/cumm/gemm/gather.pyi
+++ b/spconv/core_cc/cumm/gemm/gather.pyi
-from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
-from pccm.stubs import EnumValue, EnumClassValue
-from cumm.tensorview import Tensor
-class ScatterAll:
-    def __init__(self) -> None: ...
-    @staticmethod
-    def get_all_scatter_params() -> List[Tuple[int, int, int, int]]: ...
-    def supported_scatter(self, tile_m: int, tile_k_bytes: int, bytes_per_access: int, num_threads: int, channel_size: int, dtype: int) -> bool: 
-        """
-        Args:
-            tile_m: 
-            tile_k_bytes: 
-            bytes_per_access: 
-            num_threads: 
-            channel_size: 
-            dtype: 
-        """
-        ...
-    @staticmethod
-    def stream_synchronize(stream: int = 0) -> None: 
-        """
-        Args:
-            stream: 
-        """
-        ...
-    def scatter(self, output: Tensor, input: Tensor, indices: Tensor, tile_m: int, tile_k_bytes: int, bytes_per_access: int, num_threads: int, stream: int = 0) -> None: 
-        """
-        Args:
-            output: 
-            input: 
-            indices: 
-            tile_m: 
-            tile_k_bytes: 
-            bytes_per_access: 
-            num_threads: 
-            stream: 
-        """
-        ...
-    def scatter2(self, output: Tensor, input: Tensor, indices: Tensor, size: int, stream: int = 0) -> None: 
-        """
-        Args:
-            output: 
-            input: 
-            indices: 
-            size: 
-            stream: 
-        """
-        ...
-class GatherAll:
-    def __init__(self) -> None: ...
-    @staticmethod
-    def get_all_gather_params() -> List[Tuple[int, int, int, int]]: ...
-    @staticmethod
-    def supported(bytes_per_access: int, channel_size: int, dtype: int) -> bool: 
-        """
-        Args:
-            bytes_per_access: 
-            channel_size: 
-            dtype: 
-        """
-        ...
-    @staticmethod
-    def stream_synchronize(stream: int = 0) -> None: 
-        """
-        Args:
-            stream: 
-        """
-        ...
-    def gather(self, output: Tensor, input: Tensor, indices: Tensor, tile_m: int, tile_k_bytes: int, bytes_per_access: int, num_threads: int, stream: int = 0) -> None: 
-        """
-        Args:
-            output: 
-            input: 
-            indices: 
-            tile_m: 
-            tile_k_bytes: 
-            bytes_per_access: 
-            num_threads: 
-            stream: 
-        """
-        ...
-    def gather2(self, output: Tensor, input: Tensor, indices: Tensor, size: int, stream: int = 0) -> None: 
-        """
-        Args:
-            output: 
-            input: 
-            indices: 
-            size: 
-            stream: 
-        """
-        ...
--- a/spconv/core_cc/cumm/gemm/main.pyi
+++ b/spconv/core_cc/cumm/gemm/main.pyi
@@ -18,6 +18,7 @@ class GemmAlgoDesp:
    element_per_access_a: int
    element_per_access_b: int
    element_per_access_c: int
+    access_per_vector: int
    def __init__(self) -> None: ...
    def __repr__(self) -> str: ...
    @property

--- a/spconv/csrc/sparse/all.py
+++ b/spconv/csrc/sparse/all.py
--- a/spconv/csrc/sparse/devleop/wtf.py
+++ b/spconv/csrc/sparse/devleop/wtf.py
-#!/home/yy/library/anaconda3/bin/python
-import sys
-from pathlib import Path 
-import ctypes
-# _cudart = ctypes.CDLL('libcudart.so')
-
-print(str(Path(__file__).parent.parent.parent.parent))
-sys.path.append(str(Path(__file__).parent.parent.parent.parent))
-
-
-
-from spconv import tensorview as tv 
-
-from spconv.sparse import build
-import numpy as np 
-from pathlib import Path 
-from spconv.spconv_ops_cc.sparse.all.ops import Point2Voxel
-from spconv.spconv_ops_cc.sparse.all import SpconvOps
-
-import time 
-
-def main():
-    data = np.load("/home/yy/OneDrive/dev/spconv/test/data/benchmark-pc.npz")["pc"].astype(np.float32)
-    print(data.shape, data.dtype)
-    p2v = Point2Voxel([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3, 150000, 1)
-    gs = p2v.grid_size # zyx
-    print(gs)
-    # return
-    data_tv = tv.from_numpy(data).cuda()
-    for i in range(6):
-        t = time.time()
-
-        voxels, indices, num_per_voxel = p2v.point_to_voxel_hash(data_tv)   
-        
-        print(time.time() - t)
-    voxels, indices, num_per_voxel = p2v.point_to_voxel_hash(data_tv)   
-    print(voxels.shape, gs)
-    gs_xyz = gs
-    indices_np = indices.cpu().numpy()
-    # indices_offset = indices_np[:, 0] * gs_xyz[1] * gs_xyz[2] + indices_np[:, 1] * gs_xyz[2] + indices_np[:, 2]
-    # uq = np.unique(indices_offset)
-    # print(uq.shape, indices_offset.shape, gs_xyz)
-    # return 
-    ksize = [3] * 3 
-    kv = int(np.prod(ksize))
-    indices_with_bs = np.zeros((indices_np.shape[0], 4), dtype=np.int32)
-    indices_with_bs[:, 1:] = indices_np
-    print(indices_with_bs.mean(), indices_with_bs.max(), indices_with_bs.min())
-
-    indices = tv.from_numpy(indices_with_bs).cuda()
-    out_indices = tv.zeros([indices.dim(0) * kv, 4], tv.int32, 0)
-    indice_num_per_loc = tv.zeros([kv], tv.int32, 0)
-
-
-    points = voxels.view([-1, 3])
-    hashdata = tv.zeros([points.dim(0) * kv * 2], tv.custom64, 0)
-    hashdata_subm = tv.zeros([points.dim(0) * 2], tv.custom64, 0)
-
-    indice_pairs = tv.full([2, kv, indices.dim(0)], -1, tv.int32, 0)
-    indice_pairs_uniq = tv.zeros([indice_pairs.size // 2 + 1], tv.int32, 0)
-
-    # for i in range(10):
-    #     indice_pairs.fill_int_(-1)
-    #     np.random.shuffle(indices_with_bs)
-    #     indices = tv.from_numpy(indices_with_bs).cuda()
-
-    #     indice_num_per_loc.zero_()
-    #     out_act = SpconvOps.generate_conv_inds(indices, hashdata, indice_pairs,
-    #         indice_pairs_uniq, out_indices, indice_num_per_loc, 
-    #         1, gs, gs, [3, 3, 3], [1, 1, 1], [1, 1, 1], [1, 1, 1])
-    #     indice_num_per_loc.zero_()
-    #     out_act = SpconvOps.generate_subm_conv_inds(indices, hashdata_subm, indice_pairs,
-    #         out_indices, indice_num_per_loc, 
-    #         1, gs, ksize, [1, 1, 1])
-    #     indice_num_per_loc_cpu = indice_num_per_loc.cpu().numpy()
-    #     indice_pairs_cpu = indice_pairs.cpu().numpy()
-    #     indice_pairs_cpu_flat = indice_pairs_cpu.reshape(-1)
-    #     uq, count = np.unique(indice_pairs_cpu_flat, return_counts=True)
-    #     print(out_act, indice_pairs_cpu.shape, indice_pairs_cpu.mean(), indice_num_per_loc_cpu.tolist())
-    #     print(indice_pairs_cpu[:, 13, :2])
-    #     print(uq, count)
-
-if __name__ == "__main__":
-
-    main()
\ No newline at end of file
--- a/spconv/csrc/sparse/gather.py
+++ b/spconv/csrc/sparse/gather.py
+# Copyright 2021 Yan Yan
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pccm 
+from cumm.common import TensorView
+from typing import List 
+
+class GatherCPU(pccm.Class):
+    def __init__(self):
+        super().__init__()
+        self.add_dependency(TensorView)
+    
+    @pccm.static_function
+    def gather(self):
+        code = pccm.FunctionCode()
+        code.arg("out", "tv::Tensor")
+        code.arg("in", "tv::Tensor")
+        code.arg("inds", "tv::Tensor")
+
+        code.raw(f"""
+        // tv::check_shape(inds, {{out.dim(0)}});
+
+        auto nhot = inds.dim(0);
+        int channel = in.dim(1);
+        tv::dispatch<float, double>(out.dtype(), [&](auto I){{
+            auto indices_data = inds.data_ptr<const int>();
+
+            using T = TV_DECLTYPE(I);
+            T *buffer_data = out.data_ptr<T>();
+            const T *features_data = in.data_ptr<const T>();
+            for (int i = 0; i < nhot; ++i) {{
+                std::memcpy(buffer_data + i * channel,
+                            features_data + indices_data[i] * channel,
+                            sizeof(T) * channel);
+            }}
+        }});
+        """)
+        return code
+
+    @pccm.static_function
+    def scatter_add(self):
+        code = pccm.FunctionCode()
+        code.arg("out", "tv::Tensor")
+        code.arg("in", "tv::Tensor")
+        code.arg("inds", "tv::Tensor")
+        code.raw(f"""
+        // tv::check_shape(inds, {{in.dim(0)}});
+        auto nhot = inds.dim(0);
+        int channel = in.dim(1);
+        tv::dispatch<float, double>(out.dtype(), [&](auto I){{
+            using T = TV_DECLTYPE(I);
+            auto indices_data = inds.data_ptr<const int>();
+            const T *buffer_data = in.data_ptr<const T>();
+            T *features_data = out.data_ptr<T>();
+            const T *buf = in.data_ptr<const T>();
+            T *out_ptr = out.data_ptr<T>();
+            for (int i = 0; i < nhot; ++i) {{
+                buf = buffer_data + i * channel;
+                out_ptr = features_data + indices_data[i] * channel;
+                for (int j = 0; j < channel; ++j) {{
+                    out_ptr[j] = out_ptr[j] + buf[j];
+                }}
+            }}
+        }});
+        """)
+        return code
--- a/spconv/csrc/sparse/indices.py
+++ b/spconv/csrc/sparse/indices.py
--- a/spconv/csrc/sparse/maxpool.py
+++ b/spconv/csrc/sparse/maxpool.py
--- a/spconv/csrc/sparse/pointops.py
+++ b/spconv/csrc/sparse/pointops.py
--- a/spconv/pytorch/conv.py
+++ b/spconv/pytorch/conv.py
--- a/spconv/pytorch/core.py
+++ b/spconv/pytorch/core.py
--- a/spconv/pytorch/functional.py
+++ b/spconv/pytorch/functional.py
--- a/spconv/pytorch/ops.py
+++ b/spconv/pytorch/ops.py
--- a/spconv/pytorch/pool.py
+++ b/spconv/pytorch/pool.py
--- a/spconv/pytorch/tables.py
+++ b/spconv/pytorch/tables.py
@@ -18,15 +18,17 @@ from torch.autograd import Function
 import spconv.pytorch as spconv
 #from torch.nn import Module
 from spconv.pytorch.modules import SparseModule
-
+from spconv.pytorch.core import SparseConvTensor
+from typing import List 

 class JoinTable(SparseModule):  # Module):
-    def forward(self, input):
+    def forward(self, input: List[SparseConvTensor]):
        output = spconv.SparseConvTensor(
-            torch.cat([i.features for i in input], 1), input[1].indices,
-            input[1].spatial_shape, input[0].batch_size)
-        output.indice_dict = input[1].indice_dict
-        output.grid = input[1].grid
+            torch.cat([i.features for i in input], 1), input[0].indices,
+            input[0].spatial_shape, input[0].batch_size, input[0].grid, input[0].voxel_num,
+            input[0].indice_dict)
+        output.benchmark_record = input[1].benchmark_record
+        output.thrust_allocator = input[1].thrust_allocator
        return output

    def input_spatial_size(self, out_size):
@@ -34,14 +36,13 @@ class JoinTable(SparseModule):  # Module):


 class AddTable(SparseModule):  # Module):
-    def forward(self, input):
-        output = spconv.SparseConvTensor(sum([i.features for i in input]),
-                                         input[1].indices,
-                                         input[1].spatial_shape,
-                                         input[1].batch_size)
-        output.indice_dict = input[1].indice_dict
-        output.grid = input[1].grid
-
+    def forward(self, input: List[SparseConvTensor]):
+        output = spconv.SparseConvTensor(
+            sum([i.features for i in input]), input[0].indices,
+            input[0].spatial_shape, input[0].batch_size, input[0].grid, input[0].voxel_num,
+            input[0].indice_dict)
+        output.benchmark_record = input[1].benchmark_record
+        output.thrust_allocator = input[1].thrust_allocator
        return output

    def input_spatial_size(self, out_size):

--- a/spconv/pytorch/utils.py
+++ b/spconv/pytorch/utils.py
--- a/spconv/utils/__init__.py
+++ b/spconv/utils/__init__.py
--- a/test/aaa.py
+++ b/test/aaa.py
--- a/test/benchmark.py
+++ b/test/benchmark.py
--- a/test/test_conv.py
+++ b/test/test_conv.py