Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2f7420bc
Unverified
Commit
2f7420bc
authored
Jun 01, 2025
by
Huapeng Zhou
Committed by
GitHub
Jun 01, 2025
Browse files
[Feat] Enable PDL automatically on Hopper architecture (#5981)
parent
c6a0cacc
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
9 deletions
+28
-9
sgl-kernel/python/sgl_kernel/elementwise.py
sgl-kernel/python/sgl_kernel/elementwise.py
+21
-9
sgl-kernel/python/sgl_kernel/utils.py
sgl-kernel/python/sgl_kernel/utils.py
+7
-0
No files found.
sgl-kernel/python/sgl_kernel/elementwise.py
View file @
2f7420bc
from
typing
import
Optional
from
typing
import
Optional
import
torch
import
torch
from
sgl_kernel.utils
import
get_cuda_stream
from
sgl_kernel.utils
import
get_cuda_stream
,
is_hopper_arch
# These implementations extensively draw from and build upon the FlashInfer project https://github.com/flashinfer-ai/flashinfer
# These implementations extensively draw from and build upon the FlashInfer project https://github.com/flashinfer-ai/flashinfer
...
@@ -11,7 +11,7 @@ def rmsnorm(
...
@@ -11,7 +11,7 @@ def rmsnorm(
weight
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
eps
:
float
=
1e-6
,
eps
:
float
=
1e-6
,
out
:
Optional
[
torch
.
Tensor
]
=
None
,
out
:
Optional
[
torch
.
Tensor
]
=
None
,
enable_pdl
:
bool
=
Fals
e
,
enable_pdl
:
Optional
[
bool
]
=
Non
e
,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
r
"""Root mean square normalization.
r
"""Root mean square normalization.
...
@@ -27,9 +27,10 @@ def rmsnorm(
...
@@ -27,9 +27,10 @@ def rmsnorm(
Epsilon for numerical stability.
Epsilon for numerical stability.
out: Optional[torch.Tensor]
out: Optional[torch.Tensor]
The output tensor, if specified, the kernel will update this tensor inplace.
The output tensor, if specified, the kernel will update this tensor inplace.
enable_pdl: bool
enable_pdl:
Optional[
bool
]
Whether to enable `programmatic dependent launch
Whether to enable `programmatic dependent launch
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
If None, will be automatically enabled on Hopper architecture.
Returns
Returns
-------
-------
...
@@ -38,6 +39,8 @@ def rmsnorm(
...
@@ -38,6 +39,8 @@ def rmsnorm(
"""
"""
if
out
is
None
:
if
out
is
None
:
out
=
torch
.
empty_like
(
input
)
out
=
torch
.
empty_like
(
input
)
if
enable_pdl
is
None
:
enable_pdl
=
is_hopper_arch
()
torch
.
ops
.
sgl_kernel
.
rmsnorm
.
default
(
out
,
input
,
weight
,
eps
,
enable_pdl
)
torch
.
ops
.
sgl_kernel
.
rmsnorm
.
default
(
out
,
input
,
weight
,
eps
,
enable_pdl
)
return
out
return
out
...
@@ -47,7 +50,7 @@ def fused_add_rmsnorm(
...
@@ -47,7 +50,7 @@ def fused_add_rmsnorm(
residual
:
torch
.
Tensor
,
residual
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
eps
:
float
=
1e-6
,
eps
:
float
=
1e-6
,
enable_pdl
:
bool
=
Fals
e
,
enable_pdl
:
Optional
[
bool
]
=
Non
e
,
)
->
None
:
)
->
None
:
r
"""Fused add root mean square normalization.
r
"""Fused add root mean square normalization.
...
@@ -67,10 +70,13 @@ def fused_add_rmsnorm(
...
@@ -67,10 +70,13 @@ def fused_add_rmsnorm(
Weight tensor, shape (hidden_size,).
Weight tensor, shape (hidden_size,).
eps: float
eps: float
Epsilon for numerical stability.
Epsilon for numerical stability.
enable_pdl: bool
enable_pdl:
Optional[
bool
]
Whether to enable `programmatic dependent launch
Whether to enable `programmatic dependent launch
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
If None, will be automatically enabled on Hopper architecture.
"""
"""
if
enable_pdl
is
None
:
enable_pdl
=
is_hopper_arch
()
torch
.
ops
.
sgl_kernel
.
fused_add_rmsnorm
.
default
(
torch
.
ops
.
sgl_kernel
.
fused_add_rmsnorm
.
default
(
input
,
residual
,
weight
,
eps
,
enable_pdl
input
,
residual
,
weight
,
eps
,
enable_pdl
)
)
...
@@ -81,7 +87,7 @@ def gemma_rmsnorm(
...
@@ -81,7 +87,7 @@ def gemma_rmsnorm(
weight
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
eps
:
float
=
1e-6
,
eps
:
float
=
1e-6
,
out
:
Optional
[
torch
.
Tensor
]
=
None
,
out
:
Optional
[
torch
.
Tensor
]
=
None
,
enable_pdl
:
bool
=
Fals
e
,
enable_pdl
:
Optional
[
bool
]
=
Non
e
,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
r
"""Gemma-style root mean square normalization.
r
"""Gemma-style root mean square normalization.
...
@@ -97,9 +103,10 @@ def gemma_rmsnorm(
...
@@ -97,9 +103,10 @@ def gemma_rmsnorm(
Epsilon for numerical stability.
Epsilon for numerical stability.
out: Optional[torch.Tensor]
out: Optional[torch.Tensor]
The output tensor, if specified, the kernel will update this tensor inplace.
The output tensor, if specified, the kernel will update this tensor inplace.
enable_pdl: bool
enable_pdl:
Optional[
bool
]
Whether to enable `programmatic dependent launch
Whether to enable `programmatic dependent launch
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
If None, will be automatically enabled on Hopper architecture.
Returns
Returns
-------
-------
...
@@ -108,6 +115,8 @@ def gemma_rmsnorm(
...
@@ -108,6 +115,8 @@ def gemma_rmsnorm(
"""
"""
if
out
is
None
:
if
out
is
None
:
out
=
torch
.
empty_like
(
input
)
out
=
torch
.
empty_like
(
input
)
if
enable_pdl
is
None
:
enable_pdl
=
is_hopper_arch
()
torch
.
ops
.
sgl_kernel
.
gemma_rmsnorm
.
default
(
out
,
input
,
weight
,
eps
,
enable_pdl
)
torch
.
ops
.
sgl_kernel
.
gemma_rmsnorm
.
default
(
out
,
input
,
weight
,
eps
,
enable_pdl
)
return
out
return
out
...
@@ -117,7 +126,7 @@ def gemma_fused_add_rmsnorm(
...
@@ -117,7 +126,7 @@ def gemma_fused_add_rmsnorm(
residual
:
torch
.
Tensor
,
residual
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
eps
:
float
=
1e-6
,
eps
:
float
=
1e-6
,
enable_pdl
:
bool
=
Fals
e
,
enable_pdl
:
Optional
[
bool
]
=
Non
e
,
)
->
None
:
)
->
None
:
r
"""Gemma-style fused add root mean square normalization.
r
"""Gemma-style fused add root mean square normalization.
...
@@ -137,10 +146,13 @@ def gemma_fused_add_rmsnorm(
...
@@ -137,10 +146,13 @@ def gemma_fused_add_rmsnorm(
Weight tensor, shape (hidden_size,).
Weight tensor, shape (hidden_size,).
eps: float
eps: float
Epsilon for numerical stability.
Epsilon for numerical stability.
enable_pdl: bool
enable_pdl:
Optional[
bool
]
Whether to enable `programmatic dependent launch
Whether to enable `programmatic dependent launch
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
If None, will be automatically enabled on Hopper architecture.
"""
"""
if
enable_pdl
is
None
:
enable_pdl
=
is_hopper_arch
()
torch
.
ops
.
sgl_kernel
.
gemma_fused_add_rmsnorm
.
default
(
torch
.
ops
.
sgl_kernel
.
gemma_fused_add_rmsnorm
.
default
(
input
,
residual
,
weight
,
eps
,
enable_pdl
input
,
residual
,
weight
,
eps
,
enable_pdl
)
)
...
...
sgl-kernel/python/sgl_kernel/utils.py
View file @
2f7420bc
...
@@ -39,3 +39,10 @@ def _to_tensor_scalar_tuple(x):
...
@@ -39,3 +39,10 @@ def _to_tensor_scalar_tuple(x):
return
(
x
,
0
)
return
(
x
,
0
)
else
:
else
:
return
(
None
,
x
)
return
(
None
,
x
)
def
is_hopper_arch
()
->
bool
:
# Hopper arch's compute capability == 9.0
device
=
torch
.
cuda
.
current_device
()
major
,
minor
=
torch
.
cuda
.
get_device_capability
(
device
)
return
major
==
9
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment