Commit 643dc095 authored by SAC_fanth's avatar SAC_fanth
Browse files

[feature]w8a8量化模型默认不开启aiter

parent ca9ce18d
......@@ -167,7 +167,7 @@ if TYPE_CHECKING:
VLLM_MOE_USE_DEEP_GEMM: bool = True
VLLM_USE_DEEP_GEMM_E8M0: bool = True
VLLM_USE_DEEP_GEMM_TMA_ALIGNED_SCALES: bool = True
VLLM_USE_AITER_MOE_W8A8: bool = True
VLLM_USE_AITER_MOE_W8A8: bool = False
VLLM_DEEP_GEMM_WARMUP: Literal[
"skip",
"full",
......@@ -1292,7 +1292,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
int(os.getenv("VLLM_USE_DEEP_GEMM_TMA_ALIGNED_SCALES", "1"))
),
"VLLM_USE_AITER_MOE_W8A8": lambda: bool(
int(os.getenv("VLLM_USE_AITER_MOE_W8A8", "1"))
int(os.getenv("VLLM_USE_AITER_MOE_W8A8", "0"))
),
# DeepGemm JITs the kernels on-demand. The warmup attempts to make DeepGemm
# JIT all the required kernels before model execution so there is no
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment