Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
baee0860
Unverified
Commit
baee0860
authored
Oct 05, 2025
by
Bowen Bao
Committed by
GitHub
Oct 05, 2025
Browse files
[quantization] Enable aiter mxfp4 fused_moe for Quark (#10048)
Co-authored-by:
HaiShaw
<
hixiao@gmail.com
>
parent
c7a104c1
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
1 deletion
+7
-1
python/sglang/srt/layers/quantization/quark/quark_moe.py
python/sglang/srt/layers/quantization/quark/quark_moe.py
+7
-1
No files found.
python/sglang/srt/layers/quantization/quark/quark_moe.py
View file @
baee0860
...
...
@@ -12,7 +12,7 @@ from aiter.utility.fp4_utils import e8m0_shuffle
from
sglang.srt.layers.moe
import
MoeRunnerConfig
from
sglang.srt.layers.quantization.base_config
import
FusedMoEMethodBase
from
sglang.srt.utils
import
get_bool_env_var
,
mxfp_supported
,
set_weight_attrs
from
sglang.srt.utils
import
get_bool_env_var
,
is_hip
,
mxfp_supported
,
set_weight_attrs
if
TYPE_CHECKING
:
from
sglang.srt.layers.moe.token_dispatcher
import
(
...
...
@@ -23,6 +23,8 @@ if TYPE_CHECKING:
logger
=
logging
.
getLogger
(
__name__
)
_is_hip
=
is_hip
()
__all__
=
[
"QuarkMoEMethod"
,
"QuarkW4A4MXFp4MoEMethod"
]
OCP_MX_BLOCK_SIZE
=
32
...
...
@@ -182,6 +184,10 @@ class QuarkW4A4MXFp4MoEMethod(QuarkMoEMethod):
topk_output
=
dispatch_output
.
topk_output
moe_runner_config
=
self
.
moe_runner_config
topk_weights
,
topk_ids
,
_
=
topk_output
if
_is_hip
:
topk_weights
=
topk_weights
.
to
(
torch
.
float32
)
# aiter's moe_sorting requires topk_weights to be FP32
if
hasattr
(
torch
,
"float4_e2m1fn_x2"
):
w13_weight
=
layer
.
w13_weight
.
view
(
torch
.
float4_e2m1fn_x2
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment