Merge branch 'wanglong3-v0.15.1-dev-patch-50277' into 'v0.15.1-dev'

The gfx928 architecture force to set VLLM_W8A8_BACKEND == 1 See merge request dcutoolkit/deeplearing/vllm!533

Merge branch 'wanglong3-v0.15.1-dev-patch-50277' into 'v0.15.1-dev'
The gfx928 architecture force to set VLLM_W8A8_BACKEND == 1 See merge request dcutoolkit/deeplearing/vllm!533
b81573da · wangmin6 · 714c12da · 707b4891 · b81573da
Commit b81573da authored Mar 26, 2026 by wangmin6
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

vllm/envs.py vllm/envs.py +4 -1

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -8,6 +8,7 @@ import os
 import sys
 import tempfile
 import uuid
+import torch
 from collections.abc import Callable
 from typing import TYPE_CHECKING, Any, Literal
@@ -1897,7 +1898,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # cutlass: 2 (will remove in the future)
    # blaslt: 3 (default)
    # rocblas: others
-    "VLLM_W8A8_BACKEND": lambda: int(os.getenv("VLLM_W8A8_BACKEND", "3")),
+    "VLLM_W8A8_BACKEND": lambda: int(
+            1 if "gfx928" in torch.cuda.get_device_properties("cuda").gcnArchName.split(':')[0] else os.getenv("VLLM_W8A8_BACKEND", "3")
+    ),
    # Capture MoE router logits for debugging/analysis.
    "VLLM_MOE_ROUTER_CAPTURE":
    lambda: (os.getenv("VLLM_MOE_ROUTER_CAPTURE", "0").lower() in ("true", "1")),