Commit dc61940b authored by yan.yan's avatar yan.yan
Browse files

debug win CI: only compile one kernel to speed up

parent 520065c6
[build-system] [build-system]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.2.11", "cumm>=0.1.6"] requires = ["setuptools>=41.0", "wheel", "pccm>=0.2.11", "cumm>=0.1.7"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
...@@ -143,7 +143,9 @@ if disable_jit is not None and disable_jit == "1": ...@@ -143,7 +143,9 @@ if disable_jit is not None and disable_jit == "1":
'upload': UploadCommand, 'upload': UploadCommand,
'build_ext': PCCMBuild, 'build_ext': PCCMBuild,
} }
from cumm.gemm.main import GemmMainUnitTest, SHUFFLE_SIMT_PARAMS, SHUFFLE_VOLTA_PARAMS, SHUFFLE_TURING_PARAMS from cumm.gemm.main import GemmMainUnitTest
from spconv.core import SHUFFLE_SIMT_PARAMS, SHUFFLE_VOLTA_PARAMS, SHUFFLE_TURING_PARAMS
from spconv.csrc.sparse.all import SpconvOps from spconv.csrc.sparse.all import SpconvOps
cu = GemmMainUnitTest(SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS) cu = GemmMainUnitTest(SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS)
...@@ -154,7 +156,7 @@ if disable_jit is not None and disable_jit == "1": ...@@ -154,7 +156,7 @@ if disable_jit is not None and disable_jit == "1":
else: else:
std = "c++17" std = "c++17"
ext_modules: List[Extension] = [ ext_modules: List[Extension] = [
PCCMExtension([cu, SpconvOps()], PCCMExtension([cu],
"spconv/core_cc", "spconv/core_cc",
Path(__file__).resolve().parent / "spconv", Path(__file__).resolve().parent / "spconv",
objects_folder="objects", objects_folder="objects",
......
...@@ -32,16 +32,16 @@ class AlgoHint(Enum): ...@@ -32,16 +32,16 @@ class AlgoHint(Enum):
# TODO two step build: build gemm kernels first, then bind for every python # TODO two step build: build gemm kernels first, then bind for every python
SHUFFLE_SIMT_PARAMS: List[GemmAlgoParams] = [ SHUFFLE_SIMT_PARAMS: List[GemmAlgoParams] = [
*gen_shuffle_params( # *gen_shuffle_params(
(64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"], # (64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.SimtDP4A, None), # 2, kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params( # *gen_shuffle_params(
(128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"], # (128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.SimtDP4A, None), # 2, kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params( # *gen_shuffle_params(
(128, 128, 32), # (128, 128, 32),
(32, 64, 32), ["s8,s8,s32,s32,s32"], 2, # (32, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.SimtDP4A, None), # kernel.GemmAlgo.SimtDP4A, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 32), # (128, 128, 32),
# (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2, # (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2,
...@@ -49,156 +49,156 @@ SHUFFLE_SIMT_PARAMS: List[GemmAlgoParams] = [ ...@@ -49,156 +49,156 @@ SHUFFLE_SIMT_PARAMS: List[GemmAlgoParams] = [
*gen_shuffle_params( *gen_shuffle_params(
(64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"], (64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.SimtDP4A, None), 2, kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params(
(64, 256, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 256, 8), # (64, 256, 8),
# (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params( # # *gen_shuffle_params(
(32, 128, 16), # # (64, 256, 8),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params( # *gen_shuffle_params(
(32, 512, 8), # (32, 128, 16),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (32, 512, 8),
# (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params(
# # (128, 128, 8),
# # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 8), # (128, 128, 8),
# (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(128, 128, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 128, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 128, 8), # (64, 128, 8),
# (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params(
# # (64, 128, 8),
# # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params(
# # (128, 64, 8),
# # (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 64, 8), # (128, 64, 8),
# (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(128, 64, 8),
(64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 64, 8),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 64, 16),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 32, 16),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 32, 32),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# fall back kernels if mat is misaligned for half
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 8), # (64, 64, 8),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (32, 64, 16),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (64, 32, 16),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (32, 32, 32),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # fall back kernels if mat is misaligned for half
# # *gen_shuffle_params(
# # (128, 128, 8),
# # (32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (32, 64, 32),
# (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (32, 32, 32),
# (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params(
# # (64, 64, 16),
# # (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (64, 128, 16),
# (32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 64, 32),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 32, 32),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 64, 16), # (64, 64, 8),
# (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 128, 16),
(32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 64, 8),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
] ]
SHUFFLE_VOLTA_PARAMS: List[GemmAlgoParams] = [ SHUFFLE_VOLTA_PARAMS: List[GemmAlgoParams] = [
*gen_shuffle_params(
(64, 64, 32),
(32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 32), # (64, 64, 32),
# (32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# # *gen_shuffle_params(
# # (128, 128, 32),
# # (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# # kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# *gen_shuffle_params(
# (128, 256, 32),
# (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))), # kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params( # *gen_shuffle_params(
(128, 256, 32), # (256, 128, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))), # kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params( # *gen_shuffle_params(
(256, 128, 32), # (128, 64, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))), # kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params( # *gen_shuffle_params(
(128, 64, 32), # (64, 128, 32),
(64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))), # kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params(
(64, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
] ]
# SHUFFLE_VOLTA_PARAMS = [] # SHUFFLE_VOLTA_PARAMS = []
SHUFFLE_TURING_PARAMS: List[GemmAlgoParams] = [ SHUFFLE_TURING_PARAMS: List[GemmAlgoParams] = [
*gen_shuffle_params( # *gen_shuffle_params(
(64, 64, 32), # (64, 64, 32),
(32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))), # kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 32), # (128, 128, 32),
# (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# # *gen_shuffle_params(
# # (128, 128, 32),
# # (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# # kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (64, 64, 64),
# (32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (64, 128, 64),
# (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (128, 256, 32),
# (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (256, 128, 32),
# (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (128, 64, 32),
# (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))), # kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params( # *gen_shuffle_params(
(64, 64, 64), # (64, 128, 32),
(32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))), # kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params( # *gen_shuffle_params(
(64, 128, 64), # (64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"],
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # 2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 256, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(256, 128, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 64, 32),
(64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(64, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(128, 128, 32),
(32, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 32), # (128, 128, 32),
# (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2, # (32, 64, 32), ["s8,s8,s32,s32,s32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), # kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params( # # *gen_shuffle_params(
(128, 256, 32), # # (128, 128, 32),
(64, 64, 32), ["s8,s8,s32,s32,s32"], 2, # # (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), # # kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params( # *gen_shuffle_params(
(256, 128, 32), # (128, 256, 32),
(64, 64, 32), ["s8,s8,s32,s32,s32"], 2, # (64, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), # kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params( # *gen_shuffle_params(
(128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"], # (256, 128, 32),
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), # (64, 64, 32), ["s8,s8,s32,s32,s32"], 2,
*gen_shuffle_params( # kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
(64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"], # *gen_shuffle_params(
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), # (128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"],
# 2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# *gen_shuffle_params(
# (64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"],
# 2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
] ]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment