Commit 2093c9e7 authored by guizhh's avatar guizhh
Browse files

fix:恢复 topk 归一化默认值并强制 fused moe 归一化

parent fb35feea
...@@ -1683,7 +1683,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1683,7 +1683,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# vLLM will use optimized topk_softmax + renormalize # vLLM will use optimized topk_softmax + renormalize
"VLLM_USE_TOPK_RENORM": "VLLM_USE_TOPK_RENORM":
lambda: lambda:
(os.environ.get("VLLM_USE_TOPK_RENORM", "False").lower() in (os.environ.get("VLLM_USE_TOPK_RENORM", "True").lower() in
("true", "1")), ("true", "1")),
# vLLM will use fused RMS + RoPE kernel # vLLM will use fused RMS + RoPE kernel
"VLLM_USE_FUSED_RMS_ROPE": "VLLM_USE_FUSED_RMS_ROPE":
......
...@@ -1370,7 +1370,7 @@ def vllm_topk_softmax(topk_weights: torch.Tensor, topk_indices: torch.Tensor, ...@@ -1370,7 +1370,7 @@ def vllm_topk_softmax(topk_weights: torch.Tensor, topk_indices: torch.Tensor,
topk_indices, topk_indices,
token_expert_indices, token_expert_indices,
gating_output, gating_output,
renormalize, True,
) )
else: else:
ops.topk_softmax( ops.topk_softmax(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment