[fix]删掉错误添加代码

cff5452a · 王敏 · 8db76782 · cff5452a
Commit cff5452a authored Jun 17, 2025 by 王敏
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 30 deletions

tests/kernels/moe/test_moe.py tests/kernels/moe/test_moe.py +24 -30

No files found.
--- a/tests/kernels/moe/test_moe.py
+++ b/tests/kernels/moe/test_moe.py
@@ -13,7 +13,6 @@ from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
 import vllm.model_executor.layers.fused_moe  # noqa
 from tests.kernels.utils import (opcheck, stack_and_dev, torch_moe,
                                 torch_moe_single)
-from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.model_executor.layers.fused_moe import fused_moe
 from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
 from vllm.model_executor.layers.fused_moe.moe_torch_iterative import (
@@ -30,9 +29,6 @@ NUM_EXPERTS = [8, 64]
 EP_SIZE = [1, 4]
 TOP_KS = [2, 6]

-vllm_config = VllmConfig()
-vllm_config.scheduler_config.max_num_seqs = 128
-vllm_config.scheduler_config.max_model_len = 8192

 @pytest.mark.parametrize("m", [1, 33, 64, 222, 1024 * 128])
 @pytest.mark.parametrize("n", [128, 1024, 2048])
@@ -71,33 +67,31 @@ def test_fused_moe(
    else:
        e_map = None

-    with set_current_vllm_config(vllm_config):
-        torch_output = torch_moe(a, w1, w2, score, topk, e_map)
-        iterative_output = iterative_moe(a,
-                                         w1,
-                                         w2,
-                                         score,
-                                         topk,
-                                         global_num_experts=e,
-                                         expert_map=e_map,
-                                         renormalize=False)
-
-        # Pad the weight if moe padding is enabled
-        if padding:
-            w1 = F.pad(w1, (0, 128), "constant", 0)[..., 0:-128]
-            torch.cuda.empty_cache()
-            w2 = F.pad(w2, (0, 128), "constant", 0)[..., 0:-128]
-            torch.cuda.empty_cache()
-
-        triton_output = fused_moe(a,
-                                  w1,
-                                  w2,
-                                  score,
-                                  topk,
-                                  global_num_experts=e,
-                                  expert_map=e_map,
-                                  renormalize=False)
+    torch_output = torch_moe(a, w1, w2, score, topk, e_map)
+    iterative_output = iterative_moe(a,
+                                     w1,
+                                     w2,
+                                     score,
+                                     topk,
+                                     global_num_experts=e,
+                                     expert_map=e_map,
+                                     renormalize=False)

+    # Pad the weight if moe padding is enabled
+    if padding:
+        w1 = F.pad(w1, (0, 128), "constant", 0)[..., 0:-128]
+        torch.cuda.empty_cache()
+        w2 = F.pad(w2, (0, 128), "constant", 0)[..., 0:-128]
+        torch.cuda.empty_cache()
+
+    triton_output = fused_moe(a,
+                              w1,
+                              w2,
+                              score,
+                              topk,
+                              global_num_experts=e,
+                              expert_map=e_map,
+                              renormalize=False)
    torch.testing.assert_close(triton_output, torch_output, atol=2e-2, rtol=0)
    torch.testing.assert_close(iterative_output,
                               torch_output,