[Kernel] changing fused moe kernel chunk size default to 32k (#7995)

34a0e96d · Avshalom Manevich · GitHub · 80c7b089 · 34a0e96d
Unverified Commit 34a0e96d authored Aug 30, 2024 by Avshalom Manevich Committed by GitHub Aug 30, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/envs.py vllm/envs.py +1 -1

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -352,7 +352,7 @@ environment_variables: Dict[str, Callable[[], Any]] = {
            os.path.join(get_default_cache_root(), "vllm", "xla_cache"),
        )),
    "VLLM_FUSED_MOE_CHUNK_SIZE":
-    lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "65536")),
+    lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "32768")),
    # If set, vllm will skip the deprecation warnings.
    "VLLM_NO_DEPRECATION_WARNING":