Unverified Commit 76c6e6da authored by Kunshang Ji's avatar Kunshang Ji Committed by GitHub
Browse files

[XPU] Support block fp8 moe by fallback to TritonExpert on XPU (#36458)


Signed-off-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent 41846537
......@@ -1940,7 +1940,7 @@ class TritonExperts(mk.FusedMoEExpertsModular):
@staticmethod
def _supports_current_device() -> bool:
return current_platform.is_cuda_alike()
return current_platform.is_cuda_alike() or current_platform.is_xpu()
@staticmethod
def _supports_no_act_and_mul() -> bool:
......@@ -1959,8 +1959,10 @@ class TritonExperts(mk.FusedMoEExpertsModular):
else:
is_rocm_on_gfx9 = False
device_supports_fp8 = is_rocm_on_gfx9 or (
p.is_cuda() and p.has_device_capability((8, 9))
device_supports_fp8 = (
is_rocm_on_gfx9
or (p.is_cuda() and p.has_device_capability((8, 9)))
or p.is_xpu()
)
if not device_supports_fp8:
......
......@@ -94,6 +94,11 @@ def _get_priority_backends(
else:
_move_to_front(_AVAILABLE_BACKENDS, Fp8MoeBackend.TRITON)
if current_platform.is_xpu():
# XPU platform supports TritonExperts and XPUExpertsFp8,
# move XPU backend to the front.
_move_to_front(_AVAILABLE_BACKENDS, Fp8MoeBackend.XPU)
return _AVAILABLE_BACKENDS
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment