Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bfb9bdaf
Unverified
Commit
bfb9bdaf
authored
Jan 29, 2026
by
Michael Goin
Committed by
GitHub
Jan 29, 2026
Browse files
[Bugfix] Enable Triton MoE for FP8 per-tensor dynamic (#33300)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
2284461d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
0 deletions
+3
-0
vllm/model_executor/layers/fused_moe/fused_batched_moe.py
vllm/model_executor/layers/fused_moe/fused_batched_moe.py
+1
-0
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
+2
-0
No files found.
vllm/model_executor/layers/fused_moe/fused_batched_moe.py
View file @
bfb9bdaf
...
@@ -933,6 +933,7 @@ class BatchedTritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
...
@@ -933,6 +933,7 @@ class BatchedTritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
SUPPORTED_W_A_FP8
=
[
SUPPORTED_W_A_FP8
=
[
(
kFp8Static128BlockSym
,
kFp8Dynamic128Sym
),
(
kFp8Static128BlockSym
,
kFp8Dynamic128Sym
),
(
kFp8StaticChannelSym
,
kFp8DynamicTokenSym
),
(
kFp8StaticChannelSym
,
kFp8DynamicTokenSym
),
(
kFp8StaticTensorSym
,
kFp8DynamicTokenSym
),
(
kFp8StaticTensorSym
,
kFp8StaticTensorSym
),
(
kFp8StaticTensorSym
,
kFp8StaticTensorSym
),
(
kFp8StaticTensorSym
,
kFp8DynamicTensorSym
),
(
kFp8StaticTensorSym
,
kFp8DynamicTensorSym
),
]
]
...
...
vllm/model_executor/layers/fused_moe/fused_moe.py
View file @
bfb9bdaf
...
@@ -45,6 +45,7 @@ from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import OCP_MX_Sc
...
@@ -45,6 +45,7 @@ from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import OCP_MX_Sc
from
vllm.model_executor.layers.quantization.utils.quant_utils
import
(
from
vllm.model_executor.layers.quantization.utils.quant_utils
import
(
QuantKey
,
QuantKey
,
kFp8Dynamic128Sym
,
kFp8Dynamic128Sym
,
kFp8DynamicTensorSym
,
kFp8DynamicTokenSym
,
kFp8DynamicTokenSym
,
kFp8Static128BlockSym
,
kFp8Static128BlockSym
,
kFp8StaticChannelSym
,
kFp8StaticChannelSym
,
...
@@ -1932,6 +1933,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
...
@@ -1932,6 +1933,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
(
kFp8StaticChannelSym
,
kFp8DynamicTokenSym
),
(
kFp8StaticChannelSym
,
kFp8DynamicTokenSym
),
(
kFp8StaticTensorSym
,
kFp8DynamicTokenSym
),
(
kFp8StaticTensorSym
,
kFp8DynamicTokenSym
),
(
kFp8StaticTensorSym
,
kFp8StaticTensorSym
),
(
kFp8StaticTensorSym
,
kFp8StaticTensorSym
),
(
kFp8StaticTensorSym
,
kFp8DynamicTensorSym
),
]
]
return
(
weight_key
,
activation_key
)
in
SUPPORTED_W_A
return
(
weight_key
,
activation_key
)
in
SUPPORTED_W_A
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment