update fused_moe.py

cbeaf0b5 · zhuwenwen · 2b47d4fa · cbeaf0b5
Commit cbeaf0b5 authored Mar 17, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 3 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +1 -3

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -918,9 +918,7 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
        assert B_scale is None
    EM = sorted_token_ids.shape[0]
-    if use_int4_w4a16:
+    if A.shape[0] < config["BLOCK_SIZE_M"]:
-        EM = sorted_token_ids.shape[0]
-    elif A.shape[0] < config["BLOCK_SIZE_M"]:
        # optimize for small batch_size.
        # We assume that top_ids of each token is unique, so
        # so num_valid_experts <= batch_size <= BLOCK_SIZE_M,