Commit 6fa116fb authored by zhuwenwen's avatar zhuwenwen
Browse files

update VLLM_USE_TOPK_RENORM

parent fb35feea
......@@ -1683,7 +1683,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# vLLM will use optimized topk_softmax + renormalize
"VLLM_USE_TOPK_RENORM":
lambda:
(os.environ.get("VLLM_USE_TOPK_RENORM", "False").lower() in
(os.environ.get("VLLM_USE_TOPK_RENORM", "True").lower() in
("true", "1")),
# vLLM will use fused RMS + RoPE kernel
"VLLM_USE_FUSED_RMS_ROPE":
......
......@@ -1363,7 +1363,7 @@ def vllm_topk_softmax(topk_weights: torch.Tensor, topk_indices: torch.Tensor,
token_expert_indices: torch.Tensor,
gating_output: torch.Tensor,
renormalize: bool) -> tuple[torch.Tensor, ...]:
if envs.VLLM_USE_TOPK_RENORM:
if envs.VLLM_USE_TOPK_RENORM and renormalize is True:
from lightop import op as op
op.topk_softmax(
topk_weights,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment