update fused_moe.py

c588349c · zhuwenwen · 277271f8 · c588349c
Commit c588349c authored Apr 09, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +1 -1

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1539,7 +1539,7 @@ def fused_experts_impl(hidden_states: torch.Tensor,
                          device=hidden_states.device,
                          dtype=hidden_states.dtype)
    intermediate_cache1 = cache13[:M * top_k_num * N].view(M, top_k_num, N)
-    intermediate_cache3 = cache13[:M * top_k_num * (K if not use_nn_moe else w2.shape[2])].view(M, top_k_num, K)
+    intermediate_cache3 = cache13[:M * top_k_num * (K if not use_nn_moe else w2.shape[2])].view(M, top_k_num, K if not use_nn_moe else w2.shape[2])

    # This needs separate memory since it's used concurrently with cache1
    intermediate_cache2 = torch.empty((M * top_k_num, N // 2),