Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a4905133
Unverified
Commit
a4905133
authored
Apr 22, 2026
by
Hank_
Committed by
GitHub
Apr 22, 2026
Browse files
[xpu][rocm] Update `current_platform.supports_fp8()` for TritonExperts (#40132)
Signed-off-by:
Hank
<
hcc.mayday@gmail.com
>
parent
ecbe42e9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
19 deletions
+6
-19
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
+1
-18
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+1
-1
vllm/platforms/xpu.py
vllm/platforms/xpu.py
+4
-0
No files found.
vllm/model_executor/layers/fused_moe/fused_moe.py
View file @
a4905133
...
@@ -1952,24 +1952,7 @@ class TritonExperts(mk.FusedMoEExpertsModular):
...
@@ -1952,24 +1952,7 @@ class TritonExperts(mk.FusedMoEExpertsModular):
weight_key
:
QuantKey
|
None
,
weight_key
:
QuantKey
|
None
,
activation_key
:
QuantKey
|
None
,
activation_key
:
QuantKey
|
None
,
)
->
bool
:
)
->
bool
:
p
=
current_platform
if
not
current_platform
.
supports_fp8
():
if
p
.
is_rocm
():
from
vllm.platforms.rocm
import
on_gfx9
,
on_gfx12x
is_rocm_on_gfx9
=
on_gfx9
()
is_rocm_on_gfx12x
=
on_gfx12x
()
else
:
is_rocm_on_gfx9
=
False
is_rocm_on_gfx12x
=
False
device_supports_fp8
=
(
is_rocm_on_gfx9
or
is_rocm_on_gfx12x
or
(
p
.
is_cuda
()
and
p
.
has_device_capability
((
8
,
9
)))
or
p
.
is_xpu
()
)
if
not
device_supports_fp8
:
return
(
weight_key
,
activation_key
)
==
(
None
,
None
)
return
(
weight_key
,
activation_key
)
==
(
None
,
None
)
SUPPORTED_W_A
=
[
SUPPORTED_W_A
=
[
...
...
vllm/platforms/rocm.py
View file @
a4905133
...
@@ -800,7 +800,7 @@ class RocmPlatform(Platform):
...
@@ -800,7 +800,7 @@ class RocmPlatform(Platform):
@
classmethod
@
classmethod
def
supports_fp8
(
cls
)
->
bool
:
def
supports_fp8
(
cls
)
->
bool
:
return
any
(
gfx
in
_GCN_ARCH
for
gfx
in
[
"gfx94"
,
"gfx95"
,
"
gfx12
"
]
)
return
on_gfx9
()
or
on_
gfx12
x
(
)
@
classmethod
@
classmethod
def
is_fp8_fnuz
(
cls
)
->
bool
:
def
is_fp8_fnuz
(
cls
)
->
bool
:
...
...
vllm/platforms/xpu.py
View file @
a4905133
...
@@ -323,6 +323,10 @@ class XPUPlatform(Platform):
...
@@ -323,6 +323,10 @@ class XPUPlatform(Platform):
)
)
return
"vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"
# noqa
return
"vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"
# noqa
@
classmethod
def
supports_fp8
(
cls
)
->
bool
:
return
True
@
classmethod
@
classmethod
def
get_default_ir_op_priority
(
def
get_default_ir_op_priority
(
cls
,
vllm_config
:
"VllmConfig"
cls
,
vllm_config
:
"VllmConfig"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment