Unverified Commit a4905133 authored by Hank_'s avatar Hank_ Committed by GitHub
Browse files

[xpu][rocm] Update `current_platform.supports_fp8()` for TritonExperts (#40132)


Signed-off-by: default avatarHank <hcc.mayday@gmail.com>
parent ecbe42e9
...@@ -1952,24 +1952,7 @@ class TritonExperts(mk.FusedMoEExpertsModular): ...@@ -1952,24 +1952,7 @@ class TritonExperts(mk.FusedMoEExpertsModular):
weight_key: QuantKey | None, weight_key: QuantKey | None,
activation_key: QuantKey | None, activation_key: QuantKey | None,
) -> bool: ) -> bool:
p = current_platform if not current_platform.supports_fp8():
if p.is_rocm():
from vllm.platforms.rocm import on_gfx9, on_gfx12x
is_rocm_on_gfx9 = on_gfx9()
is_rocm_on_gfx12x = on_gfx12x()
else:
is_rocm_on_gfx9 = False
is_rocm_on_gfx12x = False
device_supports_fp8 = (
is_rocm_on_gfx9
or is_rocm_on_gfx12x
or (p.is_cuda() and p.has_device_capability((8, 9)))
or p.is_xpu()
)
if not device_supports_fp8:
return (weight_key, activation_key) == (None, None) return (weight_key, activation_key) == (None, None)
SUPPORTED_W_A = [ SUPPORTED_W_A = [
......
...@@ -800,7 +800,7 @@ class RocmPlatform(Platform): ...@@ -800,7 +800,7 @@ class RocmPlatform(Platform):
@classmethod @classmethod
def supports_fp8(cls) -> bool: def supports_fp8(cls) -> bool:
return any(gfx in _GCN_ARCH for gfx in ["gfx94", "gfx95", "gfx12"]) return on_gfx9() or on_gfx12x()
@classmethod @classmethod
def is_fp8_fnuz(cls) -> bool: def is_fp8_fnuz(cls) -> bool:
......
...@@ -323,6 +323,10 @@ class XPUPlatform(Platform): ...@@ -323,6 +323,10 @@ class XPUPlatform(Platform):
) )
return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa
@classmethod
def supports_fp8(cls) -> bool:
return True
@classmethod @classmethod
def get_default_ir_op_priority( def get_default_ir_op_priority(
cls, vllm_config: "VllmConfig" cls, vllm_config: "VllmConfig"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment