Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
83087247
Unverified
Commit
83087247
authored
Oct 28, 2025
by
Cheng Wan
Committed by
GitHub
Oct 28, 2025
Browse files
[hotfix] missing `w13_weight_fp8` and `w2_weight_fp8` in UE8M0 requantization (#12259)
parent
334543ff
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
30 deletions
+30
-30
python/sglang/srt/layers/moe/ep_moe/layer.py
python/sglang/srt/layers/moe/ep_moe/layer.py
+0
-17
python/sglang/srt/layers/moe/moe_runner/deep_gemm.py
python/sglang/srt/layers/moe/moe_runner/deep_gemm.py
+8
-7
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+20
-4
python/sglang/srt/models/longcat_flash.py
python/sglang/srt/models/longcat_flash.py
+2
-2
No files found.
python/sglang/srt/layers/moe/ep_moe/layer.py
View file @
83087247
...
...
@@ -131,23 +131,6 @@ class DeepEPMoE(FusedMoE):
)
# the last one is invalid rank_id
self
.
expert_mask
[:
-
1
]
=
1
elif
not
_is_npu
:
self
.
w13_weight_fp8
=
(
self
.
w13_weight
,
(
self
.
w13_weight_scale_inv
if
self
.
use_block_quant
or
self
.
use_w4afp8
else
self
.
w13_weight_scale
),
)
self
.
w2_weight_fp8
=
(
self
.
w2_weight
,
(
self
.
w2_weight_scale_inv
if
self
.
use_block_quant
or
self
.
use_w4afp8
else
self
.
w2_weight_scale
),
)
def
forward
(
self
,
...
...
python/sglang/srt/layers/moe/moe_runner/deep_gemm.py
View file @
83087247
...
...
@@ -227,15 +227,16 @@ class DeepGemmRunnerCore(MoeRunnerCore):
hidden_states_device
=
running_state
[
"hidden_states_device"
]
if
deep_gemm_wrapper
.
DEEPGEMM_SCALE_UE8M0
:
b
,
s_mn
,
s_k
=
hidden_states_scale
.
shape
assert
(
s_mn
%
4
==
0
and
s_k
%
4
==
0
),
f
"scales must be aligned to 4, but got (
{
b
}
,
{
s_mn
}
,
{
s_k
}
)"
# GroupGemm-0
if
deep_gemm_wrapper
.
DEEPGEMM_SCALE_UE8M0
:
hidden_states_scale
=
_cast_to_e8m0_with_rounding_up
(
hidden_states_scale
)
if
hidden_states_scale
.
dtype
!=
torch
.
int
:
b
,
s_mn
,
s_k
=
hidden_states_scale
.
shape
assert
(
s_mn
%
4
==
0
and
s_k
%
4
==
0
),
f
"scales must be aligned to 4, but got (
{
b
}
,
{
s_mn
}
,
{
s_k
}
)"
hidden_states_scale
=
_cast_to_e8m0_with_rounding_up
(
hidden_states_scale
)
else
:
hidden_states_scale
=
deep_gemm_wrapper
.
get_mn_major_tma_aligned_tensor
(
hidden_states_scale
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
83087247
...
...
@@ -3289,8 +3289,8 @@ class DeepseekV2ForCausalLM(nn.Module):
experts
=
layer
.
mlp
.
experts
if
isinstance
(
experts
,
DeepEPMoE
):
for
w
in
[
experts
.
w13_weight
_fp8
,
experts
.
w2_weight
_fp8
,
(
experts
.
w13_weight
,
experts
.
w13_weight_scale_inv
)
,
(
experts
.
w2_weight
,
experts
.
w2_weight_scale_inv
)
,
]:
requant_weight_ue8m0_inplace
(
w
[
0
],
w
[
1
],
weight_block_size
)
else
:
...
...
@@ -3338,10 +3338,26 @@ class DeepseekV2ForCausalLM(nn.Module):
)
experts
=
layer
.
mlp
.
experts
w13_weight_fp8
=
(
experts
.
w13_weight
,
(
experts
.
w13_weight_scale_inv
if
hasattr
(
experts
,
"w13_weight_scale_inv"
)
else
experts
.
w13_weight_scale
),
)
w2_weight_fp8
=
(
experts
.
w2_weight
,
(
experts
.
w2_weight_scale_inv
if
hasattr
(
experts
,
"w2_weight_scale_inv"
)
else
experts
.
w2_weight_scale
),
)
if
isinstance
(
experts
,
DeepEPMoE
):
for
w
in
[
experts
.
w13_weight_fp8
,
experts
.
w2_weight_fp8
,
w13_weight_fp8
,
w2_weight_fp8
,
]:
transform_scale_ue8m0_inplace
(
w
[
1
],
mn
=
w
[
0
].
shape
[
-
2
])
...
...
python/sglang/srt/models/longcat_flash.py
View file @
83087247
...
...
@@ -821,8 +821,8 @@ class LongcatFlashForCausalLM(nn.Module):
experts
=
layer
.
mlp
.
experts
if
isinstance
(
experts
,
DeepEPMoE
):
for
w
in
[
experts
.
w13_weight
_fp8
,
experts
.
w2_weight
_fp8
,
(
experts
.
w13_weight
,
experts
.
w13_weight_scale_inv
)
,
(
experts
.
w2_weight
,
experts
.
w2_weight_scale_inv
)
,
]:
requant_weight_ue8m0_inplace
(
w
[
0
],
w
[
1
],
weight_block_size
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment