[xpu][rocm] Update `current_platform.supports_fp8()` for TritonExperts (#40132)

Signed-off-by: Hank <hcc.mayday@gmail.com>

[xpu][rocm] Update `current_platform.supports_fp8()` for TritonExperts (#40132)
Signed-off-by: Hank <hcc.mayday@gmail.com>
a4905133 · Hank_ · GitHub · ecbe42e9 · a4905133 · a4905133
Unverified Commit a4905133 authored Apr 22, 2026 by Hank_ Committed by GitHub Apr 22, 2026
Showing with 6 additions and 19 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +1 -18

vllm/platforms/rocm.py vllm/platforms/rocm.py +1 -1

vllm/platforms/xpu.py vllm/platforms/xpu.py +4 -0

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1952,24 +1952,7 @@ class TritonExperts(mk.FusedMoEExpertsModular):
        weight_key: QuantKey | None,
        activation_key: QuantKey | None,
    ) -> bool:
-        p = current_platform
+        if not current_platform.supports_fp8():
-        if p.is_rocm():
-            from vllm.platforms.rocm import on_gfx9, on_gfx12x
-            is_rocm_on_gfx9 = on_gfx9()
-            is_rocm_on_gfx12x = on_gfx12x()
-        else:
-            is_rocm_on_gfx9 = False
-            is_rocm_on_gfx12x = False
-        device_supports_fp8 = (
-            is_rocm_on_gfx9
-            or is_rocm_on_gfx12x
-            or (p.is_cuda() and p.has_device_capability((8, 9)))
-            or p.is_xpu()
-        )
-        if not device_supports_fp8:
            return (weight_key, activation_key) == (None, None)
        SUPPORTED_W_A = [

--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -800,7 +800,7 @@ class RocmPlatform(Platform):
    @classmethod
    def supports_fp8(cls) -> bool:
-        return any(gfx in _GCN_ARCH for gfx in ["gfx94", "gfx95", "gfx12"])
+        return on_gfx9() or on_gfx12x()
    @classmethod
    def is_fp8_fnuz(cls) -> bool:

--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -323,6 +323,10 @@ class XPUPlatform(Platform):
            )
        return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"  # noqa
+    @classmethod
+    def supports_fp8(cls) -> bool:
+        return True
    @classmethod
    def get_default_ir_op_priority(
        cls, vllm_config: "VllmConfig"