[feature]w8a8量化模型默认不开启aiter

643dc095 · SAC_fanth · ca9ce18d · 643dc095
Commit 643dc095 authored May 06, 2026 by SAC_fanth
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/envs.py vllm/envs.py +2 -2

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -167,7 +167,7 @@ if TYPE_CHECKING:
    VLLM_MOE_USE_DEEP_GEMM: bool = True
    VLLM_USE_DEEP_GEMM_E8M0: bool = True
    VLLM_USE_DEEP_GEMM_TMA_ALIGNED_SCALES: bool = True
-    VLLM_USE_AITER_MOE_W8A8: bool = True
+    VLLM_USE_AITER_MOE_W8A8: bool = False
    VLLM_DEEP_GEMM_WARMUP: Literal[
        "skip",
        "full",
@@ -1292,7 +1292,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
        int(os.getenv("VLLM_USE_DEEP_GEMM_TMA_ALIGNED_SCALES", "1"))
    ),
    "VLLM_USE_AITER_MOE_W8A8": lambda: bool(
-        int(os.getenv("VLLM_USE_AITER_MOE_W8A8", "1"))
+        int(os.getenv("VLLM_USE_AITER_MOE_W8A8", "0"))
    ),
    # DeepGemm JITs the kernels on-demand. The warmup attempts to make DeepGemm
    # JIT all the required kernels before model execution so there is no