Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
de2b7830
Unverified
Commit
de2b7830
authored
Nov 08, 2025
by
Yong Hoon Shin
Committed by
GitHub
Nov 08, 2025
Browse files
[ROCm] Add env to enable/disable aiter triton gemm (#28321)
Signed-off-by:
Yong Hoon Shin
<
yhshin@meta.com
>
parent
e5e9067e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
0 deletions
+8
-0
vllm/envs.py
vllm/envs.py
+7
-0
vllm/model_executor/layers/utils.py
vllm/model_executor/layers/utils.py
+1
-0
No files found.
vllm/envs.py
View file @
de2b7830
...
...
@@ -113,6 +113,7 @@ if TYPE_CHECKING:
VLLM_ROCM_USE_AITER_FP8BMM
:
bool
=
True
VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION
:
bool
=
False
VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS
:
bool
=
True
VLLM_ROCM_USE_AITER_TRITON_GEMM
:
bool
=
True
VLLM_ROCM_USE_SKINNY_GEMM
:
bool
=
True
VLLM_ROCM_FP8_PADDING
:
bool
=
True
VLLM_ROCM_MOE_PADDING
:
bool
=
True
...
...
@@ -944,6 +945,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
os
.
getenv
(
"VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS"
,
"True"
).
lower
()
in
(
"true"
,
"1"
)
),
# Whether to use aiter triton kernels for gemm ops.
# By default is enabled.
"VLLM_ROCM_USE_AITER_TRITON_GEMM"
:
lambda
:
(
os
.
getenv
(
"VLLM_ROCM_USE_AITER_TRITON_GEMM"
,
"True"
).
lower
()
in
(
"true"
,
"1"
)
),
# use rocm skinny gemms
"VLLM_ROCM_USE_SKINNY_GEMM"
:
lambda
:
(
os
.
getenv
(
"VLLM_ROCM_USE_SKINNY_GEMM"
,
"True"
).
lower
()
in
(
"true"
,
"1"
)
...
...
@@ -1586,6 +1592,7 @@ def compute_hash() -> str:
"VLLM_ROCM_USE_TRITON_ROPE"
,
"VLLM_ROCM_USE_AITER_FP8BMM"
,
"VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION"
,
"VLLM_ROCM_USE_AITER_TRITON_GEMM"
,
"VLLM_ROCM_USE_SKINNY_GEMM"
,
"VLLM_ROCM_FP8_PADDING"
,
"VLLM_ROCM_MOE_PADDING"
,
...
...
vllm/model_executor/layers/utils.py
View file @
de2b7830
...
...
@@ -106,6 +106,7 @@ def default_unquantized_gemm(
def
use_aiter_triton_gemm
(
n
,
m
,
k
,
dtype
):
if
(
envs
.
VLLM_ROCM_USE_AITER
==
0
or
envs
.
VLLM_ROCM_USE_AITER_TRITON_GEMM
==
0
# MI300's - fp8nuz=True
or
current_platform
.
is_fp8_fnuz
()
or
dtype
not
in
[
torch
.
float16
,
torch
.
bfloat16
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment