Commit b81573da authored by wangmin6's avatar wangmin6
Browse files

Merge branch 'wanglong3-v0.15.1-dev-patch-50277' into 'v0.15.1-dev'

The gfx928 architecture force to set VLLM_W8A8_BACKEND == 1

See merge request dcutoolkit/deeplearing/vllm!533
parents 714c12da 707b4891
...@@ -8,6 +8,7 @@ import os ...@@ -8,6 +8,7 @@ import os
import sys import sys
import tempfile import tempfile
import uuid import uuid
import torch
from collections.abc import Callable from collections.abc import Callable
from typing import TYPE_CHECKING, Any, Literal from typing import TYPE_CHECKING, Any, Literal
...@@ -1897,7 +1898,9 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1897,7 +1898,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
# cutlass: 2 (will remove in the future) # cutlass: 2 (will remove in the future)
# blaslt: 3 (default) # blaslt: 3 (default)
# rocblas: others # rocblas: others
"VLLM_W8A8_BACKEND": lambda: int(os.getenv("VLLM_W8A8_BACKEND", "3")), "VLLM_W8A8_BACKEND": lambda: int(
1 if "gfx928" in torch.cuda.get_device_properties("cuda").gcnArchName.split(':')[0] else os.getenv("VLLM_W8A8_BACKEND", "3")
),
# Capture MoE router logits for debugging/analysis. # Capture MoE router logits for debugging/analysis.
"VLLM_MOE_ROUTER_CAPTURE": "VLLM_MOE_ROUTER_CAPTURE":
lambda: (os.getenv("VLLM_MOE_ROUTER_CAPTURE", "0").lower() in ("true", "1")), lambda: (os.getenv("VLLM_MOE_ROUTER_CAPTURE", "0").lower() in ("true", "1")),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment