Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
15f34013
"examples/vscode:/vscode.git/clone" did not exist on "a0bf5daa6576a6899b933b11374a9864a1305051"
Unverified
Commit
15f34013
authored
Jun 23, 2025
by
Charles Chen
Committed by
GitHub
Jun 23, 2025
Browse files
Fix MTP with Deepseek R1 Fp4 (#7376)
parent
d04163b3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
1 deletion
+20
-1
python/sglang/srt/layers/moe/fused_moe_triton/layer.py
python/sglang/srt/layers/moe/fused_moe_triton/layer.py
+6
-0
python/sglang/srt/models/deepseek_nextn.py
python/sglang/srt/models/deepseek_nextn.py
+6
-0
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+8
-1
No files found.
python/sglang/srt/layers/moe/fused_moe_triton/layer.py
View file @
15f34013
...
...
@@ -330,6 +330,12 @@ class FusedMoE(torch.nn.Module):
self
.
tp_rank
=
get_tensor_model_parallel_rank
()
self
.
num_experts
=
num_experts
self
.
expert_map
=
None
if
enable_flashinfer_moe
and
quant_config
is
None
:
logger
.
warning
(
"Disable flashinfer MoE when quantization config is None."
)
enable_flashinfer_moe
=
False
enable_ep_moe
=
False
self
.
enable_flashinfer_moe
=
enable_flashinfer_moe
if
enable_ep_moe
:
assert
(
...
...
python/sglang/srt/models/deepseek_nextn.py
View file @
15f34013
...
...
@@ -44,6 +44,12 @@ class DeepseekModelNextN(nn.Module):
prefix
:
str
=
""
,
)
->
None
:
super
().
__init__
()
if
quant_config
is
not
None
and
quant_config
.
get_name
()
==
"modelopt_fp4"
:
logger
.
warning
(
"Overriding DeepseekV3ForCausalLMNextN quant config for modelopt_fp4 Deepseek model."
)
quant_config
=
None
self
.
vocab_size
=
config
.
vocab_size
self
.
embed_tokens
=
VocabParallelEmbedding
(
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
15f34013
...
...
@@ -2201,7 +2201,7 @@ class DeepseekV2ForCausalLM(nn.Module):
q_a_proj_weight
=
cached_a_proj
[
q_a_proj_name
]
kv_a_proj_weight
=
cached_a_proj
[
kv_a_proj_name
]
cat_dim
=
0
if
(
if
self
.
quant_config
is
not
None
and
(
self
.
quant_config
.
get_name
()
==
"awq"
or
self
.
quant_config
.
get_name
()
==
"moe_wna16"
):
...
...
@@ -2232,6 +2232,13 @@ class DeepseekV2ForCausalLM(nn.Module):
for
scale
in
[
"k_scale"
,
"v_scale"
]:
if
scale
in
name
:
name
=
name
.
replace
(
f
"
{
scale
[
0
]
}
_proj"
,
"attn_mqa"
)
break
if
name
not
in
params_dict
:
# modelopt ckpt contains not needed weights for MTP module:
# model.decoder.self_attn.attn_mqa.v_scale and
# model.decoder.self_attn.attn_mqa.k_scale
logger
.
warning
(
f
"
{
name
}
not found in params_dict."
)
continue
param
=
params_dict
[
name
]
weight_loader
=
getattr
(
param
,
"weight_loader"
,
default_weight_loader
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment