Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a5a623d9
Unverified
Commit
a5a623d9
authored
Apr 03, 2026
by
yzong-rh
Committed by
GitHub
Apr 04, 2026
Browse files
[Bugfix] Re-enable Renormalize routing for TRT-LLM MoE experts (#38859)
Signed-off-by:
Yifan Zong
<
yzong@redhat.com
>
parent
f8c3af2d
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
12 deletions
+7
-12
vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py
...odel_executor/layers/fused_moe/experts/trtllm_bf16_moe.py
+2
-5
vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
...model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
+5
-7
No files found.
vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py
View file @
a5a623d9
...
@@ -79,11 +79,8 @@ class TrtLlmBf16Experts(mk.FusedMoEExpertsMonolithic):
...
@@ -79,11 +79,8 @@ class TrtLlmBf16Experts(mk.FusedMoEExpertsMonolithic):
RoutingMethodType
.
Default
,
RoutingMethodType
.
Default
,
RoutingMethodType
.
DeepSeekV3
,
RoutingMethodType
.
DeepSeekV3
,
RoutingMethodType
.
Llama4
,
RoutingMethodType
.
Llama4
,
# NOTE: TRTLLM Kernel has issue with Qwen3.5 router.
RoutingMethodType
.
Renormalize
,
# Re-enable once the issue is resolved.
RoutingMethodType
.
RenormalizeNaive
,
# https://github.com/vllm-project/vllm/issues/37591
# RoutingMethodType.Renormalize,
# RoutingMethodType.RenormalizeNaive
]
]
@
staticmethod
@
staticmethod
...
...
vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
View file @
a5a623d9
...
@@ -277,13 +277,7 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
...
@@ -277,13 +277,7 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
weight_key
:
QuantKey
|
None
,
weight_key
:
QuantKey
|
None
,
activation_key
:
QuantKey
|
None
,
activation_key
:
QuantKey
|
None
,
)
->
bool
:
)
->
bool
:
"""Monolithic kernels need to express router support.
"""Monolithic kernels need to express router support."""
Renormalize/RenormalizeNaive are excluded: the monolithic kernel's
internal routing for these methods produces output uncorrelated
with the modular kernel's output and with Triton kernel's output
for Qwen3.5-35B-A3B-FP8.
See: https://github.com/vllm-project/vllm/issues/37591
"""
# NOTE(dbari): TopK routing could also be enabled, but need to validate models
# NOTE(dbari): TopK routing could also be enabled, but need to validate models
# NOTE(dbari): Default is not implemented and should not be enabled until it is
# NOTE(dbari): Default is not implemented and should not be enabled until it is
...
@@ -295,6 +289,8 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
...
@@ -295,6 +289,8 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
return
routing_method
in
[
return
routing_method
in
[
RoutingMethodType
.
DeepSeekV3
,
RoutingMethodType
.
DeepSeekV3
,
RoutingMethodType
.
Simulated
,
RoutingMethodType
.
Simulated
,
RoutingMethodType
.
Renormalize
,
RoutingMethodType
.
RenormalizeNaive
,
]
]
elif
(
weight_key
,
activation_key
)
==
(
kFp8StaticTensorSym
,
kFp8StaticTensorSym
):
elif
(
weight_key
,
activation_key
)
==
(
kFp8StaticTensorSym
,
kFp8StaticTensorSym
):
# NOTE(dbari): as above, potentially allow others here.
# NOTE(dbari): as above, potentially allow others here.
...
@@ -302,6 +298,8 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
...
@@ -302,6 +298,8 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
RoutingMethodType
.
DeepSeekV3
,
RoutingMethodType
.
DeepSeekV3
,
RoutingMethodType
.
Llama4
,
RoutingMethodType
.
Llama4
,
RoutingMethodType
.
Simulated
,
RoutingMethodType
.
Simulated
,
RoutingMethodType
.
Renormalize
,
RoutingMethodType
.
RenormalizeNaive
,
]
]
else
:
else
:
raise
ValueError
(
"Unsupported quantization scheme."
)
raise
ValueError
(
"Unsupported quantization scheme."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment