Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bc9aee38
Commit
bc9aee38
authored
Sep 08, 2025
by
zhuwenwen
Browse files
update op.moe_fused_gate
parent
a54ab95d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
35 additions
and
35 deletions
+35
-35
tests/kernels/mamba/test_mamba_ssm_ssd.py
tests/kernels/mamba/test_mamba_ssm_ssd.py
+25
-25
tests/kernels/quantization/untest_triton_scaled_mm.py
tests/kernels/quantization/untest_triton_scaled_mm.py
+0
-0
tests/spec_decode/e2e/untest_logprobs.py
tests/spec_decode/e2e/untest_logprobs.py
+0
-0
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+10
-10
No files found.
tests/kernels/mamba/test_mamba_ssm_ssd.py
View file @
bc9aee38
...
@@ -230,31 +230,31 @@ def test_mamba_chunk_scan_single_example(d_head, n_heads, seq_len_chunk_size,
...
@@ -230,31 +230,31 @@ def test_mamba_chunk_scan_single_example(d_head, n_heads, seq_len_chunk_size,
# @pytest.mark.parametrize("n_heads", [4, 8, 13])
# @pytest.mark.parametrize("n_heads", [4, 8, 13])
# @pytest.mark.parametrize("d_head", [5, 16, 21, 32])
# @pytest.mark.parametrize("d_head", [5, 16, 21, 32])
# @pytest.mark.parametrize(
# @pytest.mark.parametrize(
"seq_len_chunk_size_cases"
,
#
"seq_len_chunk_size_cases",
[
#
[
# small-ish chunk_size (8)
#
# small-ish chunk_size (8)
(
64
,
8
,
2
,
[(
64
,
32
),
(
64
,
32
)]),
#
(64, 8, 2, [(64, 32), (64, 32)]),
(
64
,
8
,
2
,
[(
32
,
32
),
(
32
,
32
),
(
32
,
32
)]),
#
(64, 8, 2, [(32, 32), (32, 32), (32, 32)]),
(
64
,
8
,
2
,
[(
8
,
8
),
(
8
,
8
),
(
8
,
8
)]),
# chunk size boundary
#
(64, 8, 2, [(8, 8), (8, 8), (8, 8)]), # chunk size boundary
(
64
,
8
,
2
,
[(
4
,
4
),
(
4
,
4
),
(
4
,
4
),
#
(64, 8, 2, [(4, 4), (4, 4), (4, 4),
(
4
,
4
)]),
# chunk_size larger than cont batches
#
(4, 4)]), # chunk_size larger than cont batches
(
64
,
8
,
5
,
[
#
(64, 8, 5, [
(
64
,
32
,
16
,
8
,
8
),
#
(64, 32, 16, 8, 8),
(
8
,
16
,
32
,
16
,
8
),
#
(8, 16, 32, 16, 8),
(
8
,
8
,
16
,
32
,
16
),
#
(8, 8, 16, 32, 16),
]),
# mode examples with varied lengths
#
]), # mode examples with varied lengths
# odd chunk_size
#
# odd chunk_size
(
64
,
29
,
2
,
[(
11
,
4
),
(
13
,
23
),
(
19
,
22
),
#
(64, 29, 2, [(11, 4), (13, 23), (19, 22),
(
21
,
15
)]),
# irregular sizes
#
(21, 15)]), # irregular sizes
# large-ish chunk_size (256)
#
# large-ish chunk_size (256)
(
64
,
256
,
1
,
[(
5
,
),
(
1
,
),
(
1
,
),
#
(64, 256, 1, [(5, ), (1, ), (1, ),
(
1
,
)]),
# irregular sizes with small sequences
#
(1, )]), # irregular sizes with small sequences
(
64
,
256
,
2
,
[(
5
,
30
),
(
1
,
2
),
(
1
,
2
),
#
(64, 256, 2, [(5, 30), (1, 2), (1, 2),
(
1
,
2
)]),
# irregular sizes with small sequences
#
(1, 2)]), # irregular sizes with small sequences
]
)
#
]
# def test_mamba_chunk_scan_cont_batch(d_head, n_heads, seq_len_chunk_size_cases,
# def test_mamba_chunk_scan_cont_batch(d_head, n_heads, seq_len_chunk_size_cases,
# itype):
# itype):
...
...
tests/kernels/quantization/test_triton_scaled_mm.py
→
tests/kernels/quantization/
un
test_triton_scaled_mm.py
View file @
bc9aee38
File moved
tests/spec_decode/e2e/test_logprobs.py
→
tests/spec_decode/e2e/
un
test_logprobs.py
View file @
bc9aee38
File moved
vllm/model_executor/layers/fused_moe/layer.py
View file @
bc9aee38
...
@@ -1285,19 +1285,19 @@ class FusedMoE(torch.nn.Module):
...
@@ -1285,19 +1285,19 @@ class FusedMoE(torch.nn.Module):
num_expert_group
,
num_expert_group
,
topk_group
,
topk_group
,
top_k
,
top_k
,
routed_scaling_factor
=
routed_scaling_factor
,
0
,
n_share_experts_fusion
=
0
,
routed_scaling_factor
,
)
)
else
:
else
:
topk_weights
,
topk_ids
=
ops
.
moe_fused_gate
(
topk_weights
,
topk_ids
=
ops
.
moe_fused_gate
(
router_logits
,
router_logits
,
e_score_correction_bias
,
e_score_correction_bias
,
num_expert_group
,
num_expert_group
,
topk_group
,
topk_group
,
top_k
,
top_k
,
routed_scaling_factor
=
routed_scaling_factor
,
routed_scaling_factor
=
routed_scaling_factor
,
n_share_experts_fusion
=
0
,
n_share_experts_fusion
=
0
,
)
)
else
:
else
:
topk_weights
,
topk_ids
=
grouped_topk
(
topk_weights
,
topk_ids
=
grouped_topk
(
hidden_states
=
hidden_states
,
hidden_states
=
hidden_states
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment