Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d2b0e97e
Unverified
Commit
d2b0e97e
authored
Aug 14, 2025
by
Michael Goin
Committed by
GitHub
Aug 14, 2025
Browse files
[CI Perf] Prune tests in `tests/kernels/moe/` (#22939)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
590bddbf
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
46 additions
and
31 deletions
+46
-31
tests/kernels/moe/test_batched_moe.py
tests/kernels/moe/test_batched_moe.py
+5
-8
tests/kernels/moe/test_count_expert_num_tokens.py
tests/kernels/moe/test_count_expert_num_tokens.py
+2
-3
tests/kernels/moe/test_moe.py
tests/kernels/moe/test_moe.py
+23
-10
tests/kernels/moe/test_moe_align_block_size.py
tests/kernels/moe/test_moe_align_block_size.py
+3
-3
tests/kernels/moe/test_moe_permute_unpermute.py
tests/kernels/moe/test_moe_permute_unpermute.py
+4
-4
tests/kernels/moe/test_pplx_moe.py
tests/kernels/moe/test_pplx_moe.py
+9
-3
No files found.
tests/kernels/moe/test_batched_moe.py
View file @
d2b0e97e
...
...
@@ -89,14 +89,11 @@ class BatchedMMTensors:
return
BatchedMMTensors
(
A
,
B
,
C
,
num_expert_tokens
)
@
pytest
.
mark
.
parametrize
(
"num_experts"
,
[
8
,
16
,
32
])
@
pytest
.
mark
.
parametrize
(
"max_tokens_per_expert"
,
[
32
,
64
,
128
,
192
,
224
,
256
,
512
])
@
pytest
.
mark
.
parametrize
(
"K"
,
[
128
,
256
,
1024
])
@
pytest
.
mark
.
parametrize
(
"N"
,
[
128
,
256
,
1024
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float8_e4m3fn
,
torch
.
float32
,
torch
.
float16
,
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"num_experts"
,
[
8
,
32
])
@
pytest
.
mark
.
parametrize
(
"max_tokens_per_expert"
,
[
32
,
224
,
512
])
@
pytest
.
mark
.
parametrize
(
"K"
,
[
128
,
1024
])
@
pytest
.
mark
.
parametrize
(
"N"
,
[
128
,
1024
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float8_e4m3fn
,
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"block_shape"
,
[
None
,
[
128
,
128
]])
@
pytest
.
mark
.
parametrize
(
"per_act_token_quant"
,
[
False
,
True
])
def
test_batched_mm
(
num_experts
:
int
,
max_tokens_per_expert
:
int
,
K
:
int
,
...
...
tests/kernels/moe/test_count_expert_num_tokens.py
View file @
d2b0e97e
...
...
@@ -113,8 +113,7 @@ def do_test_compute_expert_num_tokens(num_tokens: int, num_topk: int,
rtol
=
0
)
@
pytest
.
mark
.
parametrize
(
"num_tokens"
,
[
1
,
4
,
8
,
11
,
19
,
128
,
127
,
405
,
1024
,
3333
,
6666
,
7317
])
@
pytest
.
mark
.
parametrize
(
"num_tokens"
,
[
1
,
4
,
8
,
11
,
127
,
128
,
3333
,
7317
])
@
pytest
.
mark
.
parametrize
(
"num_topk"
,
[
2
,
6
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_experts"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"ep_size"
,
[
1
,
2
,
4
])
...
...
@@ -126,7 +125,7 @@ def test_compute_expert_num_tokens(num_tokens: int, num_topk: int,
ep_size
,
topk_ids_dtype
)
@
pytest
.
mark
.
parametrize
(
"numel"
,
list
(
range
(
1
,
8192
,
11
)))
@
pytest
.
mark
.
parametrize
(
"numel"
,
list
(
range
(
1
,
8192
,
11
1
)))
@
pytest
.
mark
.
parametrize
(
"num_experts"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"ep_size"
,
[
2
])
@
pytest
.
mark
.
parametrize
(
"topk_ids_dtype"
,
[
torch
.
int64
])
...
...
tests/kernels/moe/test_moe.py
View file @
d2b0e97e
...
...
@@ -42,6 +42,24 @@ NUM_EXPERTS = [8, 64, 192]
EP_SIZE
=
[
1
,
4
]
TOP_KS
=
[
2
,
6
]
FUSED_MOE_MNK_FACTORS
=
[
(
1
,
128
,
128
),
(
1
,
2048
,
128
),
(
33
,
2048
,
128
),
(
222
,
1024
,
1024
),
(
32768
,
128
,
128
),
(
32768
,
2048
,
511
),
(
40000
,
1024
,
1024
),
]
FUSED_MOE_WN16_MNK_FACTORS
=
[
(
1
,
128
,
128
),
(
1
,
1024
,
1024
),
(
32
,
2048
,
128
),
(
32
,
1024
,
1024
),
(
222
,
2048
,
1024
),
]
vllm_config
=
VllmConfig
()
vllm_config
.
scheduler_config
.
max_num_seqs
=
128
vllm_config
.
scheduler_config
.
max_model_len
=
8192
...
...
@@ -116,13 +134,11 @@ def run_moe_test(
return
baseline_output
@
pytest
.
mark
.
parametrize
(
"m"
,
[
1
,
33
,
64
,
222
,
32768
,
40000
])
@
pytest
.
mark
.
parametrize
(
"n"
,
[
128
,
1024
,
2048
])
@
pytest
.
mark
.
parametrize
(
"k"
,
[
128
,
511
,
1024
])
@
pytest
.
mark
.
parametrize
(
"m,n,k"
,
FUSED_MOE_MNK_FACTORS
)
@
pytest
.
mark
.
parametrize
(
"e"
,
NUM_EXPERTS
)
@
pytest
.
mark
.
parametrize
(
"topk"
,
TOP_KS
)
@
pytest
.
mark
.
parametrize
(
"ep_size"
,
EP_SIZE
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float16
,
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"padding"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"chunk_size"
,
[
8192
])
def
test_fused_moe
(
...
...
@@ -235,13 +251,11 @@ def test_fused_moe(
use_cudagraph
=
use_cudagraph
)
@
pytest
.
mark
.
parametrize
(
"m"
,
[
1
,
32
,
222
])
@
pytest
.
mark
.
parametrize
(
"n"
,
[
128
,
1024
,
2048
])
@
pytest
.
mark
.
parametrize
(
"k"
,
[
128
,
1024
])
@
pytest
.
mark
.
parametrize
(
"m,n,k"
,
FUSED_MOE_WN16_MNK_FACTORS
)
@
pytest
.
mark
.
parametrize
(
"e"
,
NUM_EXPERTS
)
@
pytest
.
mark
.
parametrize
(
"topk"
,
TOP_KS
)
@
pytest
.
mark
.
parametrize
(
"ep_size"
,
EP_SIZE
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float16
,
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"group_size"
,
[
64
,
128
])
@
pytest
.
mark
.
parametrize
(
"has_zp"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"weight_bits"
,
[
4
,
8
])
...
...
@@ -352,8 +366,7 @@ def test_fused_moe_wn16(m: int, n: int, k: int, e: int, topk: int,
torch
.
testing
.
assert_close
(
triton_output
,
torch_output
,
atol
=
2e-2
,
rtol
=
0
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float32
,
torch
.
float16
,
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"padding"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"use_rocm_aiter"
,
[
True
,
False
]
if
current_platform
.
is_rocm
()
else
[
False
])
...
...
tests/kernels/moe/test_moe_align_block_size.py
View file @
d2b0e97e
...
...
@@ -15,10 +15,10 @@ from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
from
vllm.platforms
import
current_platform
from
vllm.utils
import
round_up
NUM_TOKENS
=
[
1
,
3
,
7
,
16
,
256
,
2256
,
4096
]
NUM_EXPERTS
=
[
32
,
160
,
256
,
257
,
512
]
NUM_TOKENS
=
[
1
,
3
,
256
,
2256
,
4096
]
NUM_EXPERTS
=
[
32
,
160
,
256
,
257
]
TOP_KS
=
[
1
,
2
,
16
,
32
]
BLOCK_SIZES
=
[
32
,
64
,
128
,
256
]
BLOCK_SIZES
=
[
32
,
128
]
current_platform
.
seed_everything
(
0
)
...
...
tests/kernels/moe/test_moe_permute_unpermute.py
View file @
d2b0e97e
...
...
@@ -18,7 +18,7 @@ from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
from
vllm.platforms
import
current_platform
NUM_EXPERTS
=
[
16
,
64
,
256
]
TOP_KS
=
[
2
,
4
,
6
,
8
]
TOP_KS
=
[
2
,
6
,
8
]
EP_SIZE
=
[
1
,
4
,
16
]
current_platform
.
seed_everything
(
0
)
...
...
@@ -177,11 +177,11 @@ def torch_unpermute(permuted_hidden_states: torch.Tensor,
return
output
@
pytest
.
mark
.
parametrize
(
"n_token"
,
[
1
,
33
,
64
,
222
,
1024
,
2048
,
3000
,
5000
])
@
pytest
.
mark
.
parametrize
(
"n_hidden"
,
[
2048
,
4096
,
7168
])
@
pytest
.
mark
.
parametrize
(
"n_token"
,
[
1
,
33
,
1024
,
5000
])
@
pytest
.
mark
.
parametrize
(
"n_hidden"
,
[
2048
,
7168
])
@
pytest
.
mark
.
parametrize
(
"n_expert"
,
NUM_EXPERTS
)
@
pytest
.
mark
.
parametrize
(
"topk"
,
TOP_KS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float16
,
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"ep_size"
,
EP_SIZE
)
@
pytest
.
mark
.
parametrize
(
"align_block_size"
,
[
None
,
128
])
def
test_moe_permute_unpermute
(
n_token
:
int
,
n_hidden
:
int
,
topk
:
int
,
...
...
tests/kernels/moe/test_pplx_moe.py
View file @
d2b0e97e
...
...
@@ -44,6 +44,14 @@ requires_pplx = pytest.mark.skipif(
reason
=
"Requires PPLX kernels"
,
)
BATCHED_MOE_MNK_FACTORS
=
[
(
1
,
128
,
128
),
(
33
,
2048
,
128
),
(
64
,
128
,
2048
),
(
222
,
128
,
128
),
(
222
,
2048
,
1024
),
]
PPLX_COMBOS
=
[
# TODO: figure out why this fails, seems to be test problem
#(1, 128, 128),
...
...
@@ -152,9 +160,7 @@ def torch_batched_moe(
return
torch_finalize
(
out
,
topk_weight
,
topk_ids
)
@
pytest
.
mark
.
parametrize
(
"m"
,
[
1
,
33
,
64
,
222
])
@
pytest
.
mark
.
parametrize
(
"n"
,
[
128
,
1024
,
2048
])
@
pytest
.
mark
.
parametrize
(
"k"
,
[
128
,
512
,
1024
])
@
pytest
.
mark
.
parametrize
(
"m,n,k"
,
BATCHED_MOE_MNK_FACTORS
)
@
pytest
.
mark
.
parametrize
(
"e"
,
NUM_EXPERTS
)
@
pytest
.
mark
.
parametrize
(
"topk"
,
TOP_KS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
bfloat16
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment