Unverified Commit bfb9bdaf authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[Bugfix] Enable Triton MoE for FP8 per-tensor dynamic (#33300)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent 2284461d
...@@ -933,6 +933,7 @@ class BatchedTritonExperts(mk.FusedMoEPermuteExpertsUnpermute): ...@@ -933,6 +933,7 @@ class BatchedTritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
SUPPORTED_W_A_FP8 = [ SUPPORTED_W_A_FP8 = [
(kFp8Static128BlockSym, kFp8Dynamic128Sym), (kFp8Static128BlockSym, kFp8Dynamic128Sym),
(kFp8StaticChannelSym, kFp8DynamicTokenSym), (kFp8StaticChannelSym, kFp8DynamicTokenSym),
(kFp8StaticTensorSym, kFp8DynamicTokenSym),
(kFp8StaticTensorSym, kFp8StaticTensorSym), (kFp8StaticTensorSym, kFp8StaticTensorSym),
(kFp8StaticTensorSym, kFp8DynamicTensorSym), (kFp8StaticTensorSym, kFp8DynamicTensorSym),
] ]
......
...@@ -45,6 +45,7 @@ from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import OCP_MX_Sc ...@@ -45,6 +45,7 @@ from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import OCP_MX_Sc
from vllm.model_executor.layers.quantization.utils.quant_utils import ( from vllm.model_executor.layers.quantization.utils.quant_utils import (
QuantKey, QuantKey,
kFp8Dynamic128Sym, kFp8Dynamic128Sym,
kFp8DynamicTensorSym,
kFp8DynamicTokenSym, kFp8DynamicTokenSym,
kFp8Static128BlockSym, kFp8Static128BlockSym,
kFp8StaticChannelSym, kFp8StaticChannelSym,
...@@ -1932,6 +1933,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute): ...@@ -1932,6 +1933,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
(kFp8StaticChannelSym, kFp8DynamicTokenSym), (kFp8StaticChannelSym, kFp8DynamicTokenSym),
(kFp8StaticTensorSym, kFp8DynamicTokenSym), (kFp8StaticTensorSym, kFp8DynamicTokenSym),
(kFp8StaticTensorSym, kFp8StaticTensorSym), (kFp8StaticTensorSym, kFp8StaticTensorSym),
(kFp8StaticTensorSym, kFp8DynamicTensorSym),
] ]
return (weight_key, activation_key) in SUPPORTED_W_A return (weight_key, activation_key) in SUPPORTED_W_A
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment