fix wrong int8 dtype

a31b131f · yan.yan · 34e97911 · a31b131f · a31b131f · a31b131f
Commit a31b131f authored Sep 23, 2022 by yan.yan
Show whitespace changes
Inline Side-by-side

Showing with 33 additions and 13 deletions

spconv/core.py spconv/core.py +8 -8

spconv/core_cc/cumm/common.pyi spconv/core_cc/cumm/common.pyi +9 -0

test/benchmark.py test/benchmark.py +16 -5

No files found.
--- a/spconv/core.py
+++ b/spconv/core.py
@@ -39,17 +39,17 @@ class AlgoHint(Enum):
 # TODO two step build: build gemm kernels first, then bind for every python
 SHUFFLE_SIMT_PARAMS: List[GemmAlgoParams] = [
-    *gen_shuffle_params((64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"], "",
+    *gen_shuffle_params((64, 128, 32), (32, 64, 32), ["s8,s8,s8,s32,s32"], "",
                        2, kernel.GemmAlgo.SimtDP4A, None),
-    *gen_shuffle_params((128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"], "",
+    *gen_shuffle_params((128, 64, 32), (64, 32, 32), ["s8,s8,s8,s32,s32"], "",
                        2, kernel.GemmAlgo.SimtDP4A, None),
-    *gen_shuffle_params((128, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"],
+    *gen_shuffle_params((128, 128, 32), (32, 64, 32), ["s8,s8,s8,s32,s32"],
                        "", 2, kernel.GemmAlgo.SimtDP4A, None),
    *gen_shuffle_params(
        (128, 128, 32),
-        (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], "", 2,
+        (64, 32, 32), ["s8,s8,s8,s32,s32"], "", 2,
        kernel.GemmAlgo.SimtDP4A, None),
-    *gen_shuffle_params((64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"], "",
+    *gen_shuffle_params((64, 64, 32), (32, 32, 32), ["s8,s8,s8,s32,s32"], "",
                        2, kernel.GemmAlgo.SimtDP4A, None),
    *gen_shuffle_params((64, 256, 8), (32, 64, 8), ["f32,f32,f32,f32,f32"],
                        "f32,f32,f32,f32,f32", 2, kernel.GemmAlgo.Simt, None),
@@ -164,7 +164,7 @@ SHUFFLE_TURING_PARAMS: List[GemmAlgoParams] = [
        (64, 128, 32),
        (32, 64, 32), ["f16,f16,f16,f16,f16"], "f16,f16,f16,f32,f32", 2,
        kernel.GemmAlgo.Turing, TensorOp((16, 8, 8))),
-    *gen_shuffle_params((64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"], "",
+    *gen_shuffle_params((64, 64, 32), (32, 32, 32), ["s8,s8,s8,s32,s32"], "",
                        2, kernel.GemmAlgo.Turing, TensorOp((8, 8, 16))),
    *gen_shuffle_params(
        (128, 128, 32),
@@ -182,9 +182,9 @@ SHUFFLE_TURING_PARAMS: List[GemmAlgoParams] = [
        (256, 128, 32),
        (64, 64, 32), ["s8,s8,s8,s32,s32"], "", 2, kernel.GemmAlgo.Turing,
        TensorOp((8, 8, 16))),
-    *gen_shuffle_params((128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"], "",
+    *gen_shuffle_params((128, 64, 32), (64, 32, 32), ["s8,s8,s8,s32,s32"], "",
                        2, kernel.GemmAlgo.Turing, TensorOp((8, 8, 16))),
-    *gen_shuffle_params((64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"], "",
+    *gen_shuffle_params((64, 128, 32), (32, 64, 32), ["s8,s8,s8,s32,s32"], "",
                        2, kernel.GemmAlgo.Turing, TensorOp((8, 8, 16))),
 ]

--- a/spconv/core_cc/cumm/common.pyi
+++ b/spconv/core_cc/cumm/common.pyi
@@ -4,9 +4,18 @@ class CompileInfo:
    @staticmethod
    def get_compiled_cuda_arch() -> List[Tuple[int, int]]: ...
    @staticmethod
+    def get_compiled_gemm_cuda_arch() -> List[Tuple[int, int]]: ...
+    @staticmethod
    def arch_is_compiled(arch: Tuple[int, int]) -> bool: 
        """
        Args:
            arch: 
        """
        ...
+    @staticmethod
+    def arch_is_compiled_gemm(arch: Tuple[int, int]) -> bool: 
+        """
+        Args:
+            arch: 
+        """
+        ...
--- a/test/benchmark.py
+++ b/test/benchmark.py
@@ -57,8 +57,14 @@ def waymo_data_large(batch_size=1):
    pc4[:, 1] += 3
    pc5 = pc.copy()
    pc5[:, 1] += 4
+    pc6 = pc.copy()
-    pc = np.concatenate([pc, pc2, pc3, pc4, pc5])
+    pc6[:, 1] += 5
+    pc7 = pc.copy()
+    pc7[:, 1] += 6
+    pc8 = pc.copy()
+    pc8[:, 1] += 7
+    pc = np.concatenate([pc, pc2, pc3, pc4, pc5, pc6, pc7, pc8])
    print(pc.shape)
    voxels_tv, indices_tv, _ = gen.point_to_voxel(tv.from_numpy(pc))
    voxels = voxels_tv.numpy().reshape(-1, 3)
@@ -402,7 +408,7 @@ def main():
    # MaskImpGemm: 51.0ms
    # MaskSplitImpGemm: 41.1ms
    # algo = None
-    net = NetSm(spatial_shape, algo).to(device).eval().to(dtype)# .train()
+    net = Net(spatial_shape, algo).to(device).eval().to(dtype)# .train()
    # net.load_state_dict(net.state_dict())
    spconv.assign_name_for_sparse_modules(net)
    print(coors_th.shape)
@@ -427,12 +433,17 @@ def main():
                items = list(timer.get_all_pair_time().items())
                items.sort(key=lambda x: x[0])
                print("SUM TIME:",  sum([x[1] for x in items]))
-                print(json.dumps(dict(items), indent=2))
+                # print(json.dumps(dict(items), indent=2))
                inds_sum = 0
+                gemm_sum = 0
                for k, v in items:
                    if "gen_pairs" in k:
                        inds_sum += v 
-                print("SUM GEN INDS:",  inds_sum)
+                for k, v in items:
+                    if "gemm" in k:
+                        gemm_sum += v 
+                print("SUM GEN INDS:",  inds_sum, "GEMM:", gemm_sum)
    # state = net.state_dict()
    # state.pop("net.2.max_num_voxels_during_training")