update fused_moe.py

8a7c4451 · zhuwenwen · 37ef8dd3 · 8a7c4451
Commit 8a7c4451 authored Mar 15, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 3 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +2 -3

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -915,9 +915,8 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
        assert A_scale is None
        assert B_scale is None
-    if use_int4_w4a16:
+    EM = sorted_token_ids.shape[0]
-        EM = sorted_token_ids.shape[0]
+    if A.shape[0] < config["BLOCK_SIZE_M"]:
-    elif A.shape[0] < config["BLOCK_SIZE_M"]:
        # optimize for small batch_size.
        # We assume that top_ids of each token is unique, so
        # so num_valid_experts <= batch_size <= BLOCK_SIZE_M,