Optimize cutlass int8 gemm kernel for large M on SM89 Ada GPU (#10714)

0f04a5f4 · Qi Yuhang · GitHub · 2f18602f · 0f04a5f4
Unverified Commit 0f04a5f4 authored Sep 22, 2025 by Qi Yuhang Committed by GitHub Sep 21, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

sgl-kernel/csrc/gemm/int8_gemm_kernel.cu sgl-kernel/csrc/gemm/int8_gemm_kernel.cu +2 -2

No files found.
--- a/sgl-kernel/csrc/gemm/int8_gemm_kernel.cu
+++ b/sgl-kernel/csrc/gemm/int8_gemm_kernel.cu
@@ -409,8 +409,8 @@ void sm89_dispatch_shape(
    cutlass_int8_scaled_mm<
        ElementOutput,
        ArchTag,
-        cutlass::gemm::GemmShape<32, 64, 128>,
-        cutlass::gemm::GemmShape<16, 64, 64>,
+        cutlass::gemm::GemmShape<128, 128, 64>,
+        cutlass::gemm::GemmShape<64, 64, 64>,
        InstructionShape,
        5>(out, mat_a, mat_b, scales_a, scales_b, bias);
  }