Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f551bd1d
Commit
f551bd1d
authored
Feb 10, 2026
by
wanglong3
Browse files
The gfx928 architecture forces the use of the Triton gemm.
parent
a27f634a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
3 deletions
+6
-3
vllm/envs.py
vllm/envs.py
+4
-1
vllm/model_executor/layers/quantization/slimquant_w4a8.py
vllm/model_executor/layers/quantization/slimquant_w4a8.py
+2
-2
No files found.
vllm/envs.py
View file @
f551bd1d
...
...
@@ -6,6 +6,7 @@ import json
import
os
import
sys
import
tempfile
import
torch
from
typing
import
TYPE_CHECKING
,
Any
,
Callable
,
Literal
,
Optional
,
Union
if
TYPE_CHECKING
:
...
...
@@ -1704,7 +1705,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
# cutlass: 2 (will remove in the future)
# blaslt: 3 (default)
# rocblas: others
"VLLM_W8A8_BACKEND"
:
lambda
:
int
(
os
.
getenv
(
"VLLM_W8A8_BACKEND"
,
"3"
)),
"VLLM_W8A8_BACKEND"
:
lambda
:
int
(
1
if
"gfx928"
in
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchName
.
split
(
':'
)[
0
]
else
os
.
getenv
(
"VLLM_W8A8_BACKEND"
,
"3"
)),
# Force using Triton MoE path (disable Marlin W16A16 MoE).
"VLLM_USE_MOE_W16A16_TRITON"
:
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_MOE_W16A16_TRITON"
,
"0"
).
lower
()
in
...
...
vllm/model_executor/layers/quantization/slimquant_w4a8.py
View file @
f551bd1d
...
...
@@ -92,8 +92,8 @@ class SlimQuantW4A8Int8LinearMethod(LinearMethodBase):
def
__init__
(
self
,
quantization_config
:
SlimQuantW4A8Int8Config
):
self
.
quantization_config
=
quantization_config
self
.
tritonsingleton
=
W8a8GetCacheJSON
()
self
.
w8a8_strategy
=
int
(
os
.
getenv
(
'W8A8_SUPPORT_METHODS'
,
'1'
))
self
.
tritonsingleton
=
W8a8GetCacheJSON
()
self
.
w8a8_strategy
=
envs
.
VLLM_W8A8_BACKEND
def
process_weights_after_loading
(
self
,
layer
:
torch
.
nn
.
Module
)
->
None
:
n
=
layer
.
weight
.
shape
[
0
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment