Commit 707b4891 authored by wanglong3's avatar wanglong3
Browse files

The gfx928 architecture force to set VLLM_W8A8_BACKEND == 1

parent 0bd5fcd2
......@@ -8,6 +8,7 @@ import os
import sys
import tempfile
import uuid
import torch
from collections.abc import Callable
from typing import TYPE_CHECKING, Any, Literal
......@@ -1896,7 +1897,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
# cutlass: 2 (will remove in the future)
# blaslt: 3 (default)
# rocblas: others
"VLLM_W8A8_BACKEND": lambda: int(os.getenv("VLLM_W8A8_BACKEND", "3")),
"VLLM_W8A8_BACKEND": lambda: int(
1 if "gfx928" in torch.cuda.get_device_properties("cuda").gcnArchName.split(':')[0] else os.getenv("VLLM_W8A8_BACKEND", "3")
),
# Capture MoE router logits for debugging/analysis.
"VLLM_MOE_ROUTER_CAPTURE":
lambda: (os.getenv("VLLM_MOE_ROUTER_CAPTURE", "0").lower() in ("true", "1")),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment