Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c6df05eb
Unverified
Commit
c6df05eb
authored
Dec 08, 2025
by
Zhiwei
Committed by
GitHub
Dec 08, 2025
Browse files
[ROCm] [Fused Moe EP] Use binary expert mask for aiter fused moe kernel (#29773)
Signed-off-by:
ZhiweiYan-96
<
zhiwei.yan@amd.com
>
parent
d726a7b0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
0 deletions
+5
-0
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+4
-0
vllm/model_executor/layers/quantization/quark/quark_moe.py
vllm/model_executor/layers/quantization/quark/quark_moe.py
+1
-0
No files found.
vllm/model_executor/layers/fused_moe/layer.py
View file @
c6df05eb
...
...
@@ -520,6 +520,10 @@ class FusedMoE(CustomOp):
self
.
_init_aiter_shared_experts_topK_buffer
(
vllm_config
=
vllm_config
,
dp_size
=
dp_size_
)
if
self
.
use_ep
and
self
.
rocm_aiter_fmoe_enabled
:
assert
self
.
expert_mask
is
None
or
torch
.
all
(
(
expert_mask
==
0
)
|
(
expert_mask
==
1
)
),
"Aiter Fused MoE kernel only supports expert_map with 0 and 1s."
assert
intermediate_size
%
self
.
tp_size
==
0
self
.
hidden_size
=
hidden_size
...
...
vllm/model_executor/layers/quantization/quark/quark_moe.py
View file @
c6df05eb
...
...
@@ -633,6 +633,7 @@ class QuarkOCP_MX_MoEMethod(QuarkMoEMethod):
topk_ids
=
topk_ids
,
activation
=
activation
,
quant_config
=
self
.
moe_quant_config
,
expert_map
=
expert_map
,
)
else
:
from
vllm.model_executor.layers.fused_moe
import
fused_experts
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment