增加blockint8支持优化

5f15bdb5 · gaoqiong · f3deca99 · 5f15bdb5
Commit 5f15bdb5 authored Mar 26, 2025 by gaoqiong
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +6 -6

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1734,14 +1734,14 @@ def fused_experts_impl(hidden_states: torch.Tensor,
        torch.ops._C.silu_and_mul(intermediate_cache2,
                                  intermediate_cache1.view(-1, N))
        if use_int8_w8a8:
-            m1=intermediate_cache2.shape[0]
-            if m1<=16:
-                config =stage2_best_config[m1-1]
-            elif m1<=32:
+            m=curr_hidden_states.shape[0]
+            if m<=16:
+                config =stage2_best_config[m-1]
+            elif m<=32:
                config =stage2_best_config[15]
-            elif m1<=64:
+            elif m<=64:
                config =stage2_best_config[16]
-            elif m1<256:
+            elif m<256:
                config ={
                        "BLOCK_SIZE_M": 16,
                        "BLOCK_SIZE_N": 32,