Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
9b876889
Unverified
Commit
9b876889
authored
Sep 16, 2025
by
Qi Yuhang
Committed by
GitHub
Sep 16, 2025
Browse files
Update CUTLASS. Refine KernelSchedule for fp8 (grouped) gemm. (#10491)
parent
c0c6f543
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
5 deletions
+5
-5
sgl-kernel/CMakeLists.txt
sgl-kernel/CMakeLists.txt
+1
-1
sgl-kernel/csrc/cutlass_extensions/gemm/fp8_blockwise_gemm_sm90_dispatch.cuh
...lass_extensions/gemm/fp8_blockwise_gemm_sm90_dispatch.cuh
+1
-1
sgl-kernel/csrc/moe/fp8_blockwise_moe_kernel.cu
sgl-kernel/csrc/moe/fp8_blockwise_moe_kernel.cu
+3
-3
No files found.
sgl-kernel/CMakeLists.txt
View file @
9b876889
...
@@ -46,7 +46,7 @@ include(FetchContent)
...
@@ -46,7 +46,7 @@ include(FetchContent)
FetchContent_Declare
(
FetchContent_Declare
(
repo-cutlass
repo-cutlass
GIT_REPOSITORY https://github.com/NVIDIA/cutlass
GIT_REPOSITORY https://github.com/NVIDIA/cutlass
GIT_TAG
a49a78ffefc86a87160dfe0ccc3a3a2d1622c91
8
GIT_TAG
57e3cfb47a2d9e0d46eb6335c3dc411498efa19
8
GIT_SHALLOW OFF
GIT_SHALLOW OFF
)
)
FetchContent_Populate
(
repo-cutlass
)
FetchContent_Populate
(
repo-cutlass
)
...
...
sgl-kernel/csrc/cutlass_extensions/gemm/fp8_blockwise_gemm_sm90_dispatch.cuh
View file @
9b876889
...
@@ -72,7 +72,7 @@ struct cutlass_3x_gemm_fp8_blockwise {
...
@@ -72,7 +72,7 @@ struct cutlass_3x_gemm_fp8_blockwise {
using
EpilogueTileType
=
cutlass
::
epilogue
::
collective
::
EpilogueTileAuto
;
using
EpilogueTileType
=
cutlass
::
epilogue
::
collective
::
EpilogueTileAuto
;
using
StoreEpilogueCompute
=
typename
cutlass
::
epilogue
::
fusion
::
Sm90EVT
<
cutlass
::
epilogue
::
fusion
::
Sm90AccFetch
>
;
using
StoreEpilogueCompute
=
typename
cutlass
::
epilogue
::
fusion
::
Sm90EVT
<
cutlass
::
epilogue
::
fusion
::
Sm90AccFetch
>
;
using
KernelSchedule
=
cutlass
::
gemm
::
KernelTmaWarpSpecializedCooperativeFP8Block
ScaledAccum
;
using
KernelSchedule
=
cutlass
::
gemm
::
KernelTmaWarpSpecializedCooperativeFP8Block
wise
;
using
CollectiveEpilogue
=
typename
cutlass
::
epilogue
::
collective
::
CollectiveBuilder
<
using
CollectiveEpilogue
=
typename
cutlass
::
epilogue
::
collective
::
CollectiveBuilder
<
ArchTag
,
ArchTag
,
OperatorClass
,
OperatorClass
,
...
...
sgl-kernel/csrc/moe/fp8_blockwise_moe_kernel.cu
View file @
9b876889
...
@@ -463,7 +463,7 @@ void sm90_fp8_blockwise_group_mm_dispatch_shape(
...
@@ -463,7 +463,7 @@ void sm90_fp8_blockwise_group_mm_dispatch_shape(
using
MmaTileShape
=
Shape
<
_128
,
_32
,
_128
>
;
using
MmaTileShape
=
Shape
<
_128
,
_32
,
_128
>
;
using
ClusterShape
=
Shape
<
_2
,
_1
,
_1
>
;
using
ClusterShape
=
Shape
<
_2
,
_1
,
_1
>
;
// TODO: Check Pingpong or Cooperative
// TODO: Check Pingpong or Cooperative
using
KernelSchedule
=
cutlass
::
gemm
::
KernelPtrArrayTmaWarpSpecializedPingpongFP8Block
ScaledAccum
;
using
KernelSchedule
=
cutlass
::
gemm
::
KernelPtrArrayTmaWarpSpecializedPingpongFP8Block
wise
;
using
EpilogueSchedule
=
cutlass
::
epilogue
::
PtrArrayTmaWarpSpecializedPingpong
;
using
EpilogueSchedule
=
cutlass
::
epilogue
::
PtrArrayTmaWarpSpecializedPingpong
;
using
ScaleConfig
=
using
ScaleConfig
=
cutlass
::
detail
::
Sm90BlockwiseScaleConfig
<
128
,
1
,
128
,
cute
::
GMMA
::
Major
::
K
,
cute
::
GMMA
::
Major
::
K
>
;
cutlass
::
detail
::
Sm90BlockwiseScaleConfig
<
128
,
1
,
128
,
cute
::
GMMA
::
Major
::
K
,
cute
::
GMMA
::
Major
::
K
>
;
...
@@ -475,7 +475,7 @@ void sm90_fp8_blockwise_group_mm_dispatch_shape(
...
@@ -475,7 +475,7 @@ void sm90_fp8_blockwise_group_mm_dispatch_shape(
using
ElementA
=
cutlass
::
float_e4m3_t
;
using
ElementA
=
cutlass
::
float_e4m3_t
;
using
MmaTileShape
=
Shape
<
_64
,
_128
,
_128
>
;
using
MmaTileShape
=
Shape
<
_64
,
_128
,
_128
>
;
using
ClusterShape
=
Shape
<
_2
,
_1
,
_1
>
;
using
ClusterShape
=
Shape
<
_2
,
_1
,
_1
>
;
using
KernelSchedule
=
cutlass
::
gemm
::
KernelPtrArrayTmaWarpSpecializedPingpongFP8Block
ScaledAccum
;
using
KernelSchedule
=
cutlass
::
gemm
::
KernelPtrArrayTmaWarpSpecializedPingpongFP8Block
wise
;
using
EpilogueSchedule
=
cutlass
::
epilogue
::
PtrArrayTmaWarpSpecializedPingpong
;
using
EpilogueSchedule
=
cutlass
::
epilogue
::
PtrArrayTmaWarpSpecializedPingpong
;
using
ScaleConfig
=
using
ScaleConfig
=
cutlass
::
detail
::
Sm90BlockwiseScaleConfig
<
1
,
128
,
128
,
cute
::
GMMA
::
Major
::
K
,
cute
::
GMMA
::
Major
::
K
>
;
cutlass
::
detail
::
Sm90BlockwiseScaleConfig
<
1
,
128
,
128
,
cute
::
GMMA
::
Major
::
K
,
cute
::
GMMA
::
Major
::
K
>
;
...
@@ -487,7 +487,7 @@ void sm90_fp8_blockwise_group_mm_dispatch_shape(
...
@@ -487,7 +487,7 @@ void sm90_fp8_blockwise_group_mm_dispatch_shape(
using
ElementA
=
cutlass
::
float_e4m3_t
;
using
ElementA
=
cutlass
::
float_e4m3_t
;
using
MmaTileShape
=
Shape
<
_128
,
_128
,
_128
>
;
using
MmaTileShape
=
Shape
<
_128
,
_128
,
_128
>
;
using
ClusterShape
=
Shape
<
_1
,
_2
,
_1
>
;
using
ClusterShape
=
Shape
<
_1
,
_2
,
_1
>
;
using
KernelSchedule
=
cutlass
::
gemm
::
KernelPtrArrayTmaWarpSpecializedCooperativeFP8Block
ScaledAccum
;
using
KernelSchedule
=
cutlass
::
gemm
::
KernelPtrArrayTmaWarpSpecializedCooperativeFP8Block
wise
;
using
EpilogueSchedule
=
cutlass
::
epilogue
::
PtrArrayTmaWarpSpecializedCooperative
;
using
EpilogueSchedule
=
cutlass
::
epilogue
::
PtrArrayTmaWarpSpecializedCooperative
;
using
ScaleConfig
=
using
ScaleConfig
=
cutlass
::
detail
::
Sm90BlockwiseScaleConfig
<
1
,
128
,
128
,
cute
::
GMMA
::
Major
::
K
,
cute
::
GMMA
::
Major
::
K
>
;
cutlass
::
detail
::
Sm90BlockwiseScaleConfig
<
1
,
128
,
128
,
cute
::
GMMA
::
Major
::
K
,
cute
::
GMMA
::
Major
::
K
>
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment