fix:恢复 topk 归一化默认值并强制 fused moe 归一化

2093c9e7 · guizhh · fb35feea · 2093c9e7 · 2093c9e7
Commit 2093c9e7 authored Jan 20, 2026 by guizhh
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/envs.py vllm/envs.py +1 -1

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +1 -1

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -1683,7 +1683,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # vLLM will use optimized topk_softmax + renormalize
    "VLLM_USE_TOPK_RENORM":
        lambda:
-        (os.environ.get("VLLM_USE_TOPK_RENORM", "False").lower() in
+        (os.environ.get("VLLM_USE_TOPK_RENORM", "True").lower() in
                ("true", "1")),
    # vLLM will use fused RMS + RoPE kernel
    "VLLM_USE_FUSED_RMS_ROPE":

--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1370,7 +1370,7 @@ def vllm_topk_softmax(topk_weights: torch.Tensor, topk_indices: torch.Tensor,
            topk_indices,
            token_expert_indices,
            gating_output,
-            renormalize,
+            True,
        )
    else:
        ops.topk_softmax(