Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
69dd878b
"git@developer.sourcefind.cn:change/sglang.git" did not exist on "e7bc600304e98fa54184f4d7331b4e68016890b4"
Unverified
Commit
69dd878b
authored
May 30, 2025
by
Li Hui
Committed by
GitHub
May 30, 2025
Browse files
Fix shared experts fusion error (#6289)
parent
22630ca2
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
38 additions
and
12 deletions
+38
-12
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+38
-12
No files found.
python/sglang/srt/models/deepseek_v2.py
View file @
69dd878b
...
@@ -1903,23 +1903,49 @@ class DeepseekV2ForCausalLM(nn.Module):
...
@@ -1903,23 +1903,49 @@ class DeepseekV2ForCausalLM(nn.Module):
if
self
.
n_share_experts_fusion
>
0
:
if
self
.
n_share_experts_fusion
>
0
:
weights_list
=
list
(
weights
)
weights_list
=
list
(
weights
)
weights_dict
=
dict
(
weights_list
)
weights_dict
=
dict
(
weights_list
)
if
self
.
quant_config
is
None
or
self
.
quant_config
.
get_name
()
==
"w8a8_int8"
:
if
self
.
quant_config
is
not
None
:
suffix_list
=
[
if
self
.
quant_config
.
get_name
()
==
"w8a8_int8"
:
"down_proj.weight"
,
suffix_list
=
[
"down_proj.weight_scale"
,
"down_proj.weight"
,
"gate_proj.weight"
,
"down_proj.weight_scale"
,
"gate_proj.weight_scale"
,
"gate_proj.weight"
,
"up_proj.weight"
,
"gate_proj.weight_scale"
,
"up_proj.weight_scale"
,
"up_proj.weight"
,
]
"up_proj.weight_scale"
,
]
elif
(
self
.
quant_config
.
get_name
()
==
"fp8"
or
self
.
quant_config
.
get_name
()
==
"blockwise_int8"
):
suffix_list
=
[
"down_proj.weight"
,
"down_proj.weight_scale_inv"
,
"gate_proj.weight"
,
"gate_proj.weight_scale_inv"
,
"up_proj.weight"
,
"up_proj.weight_scale_inv"
,
]
elif
self
.
quant_config
.
get_name
()
==
"awq"
:
suffix_list
=
[
"down_proj.qweight"
,
"down_proj.qzeros"
,
"down_proj.scales"
,
"gate_proj.qweight"
,
"gate_proj.qzeros"
,
"gate_proj.scales"
,
"up_proj.qweight"
,
"up_proj.qzeros"
,
"up_proj.scales"
,
]
else
:
raise
ValueError
(
f
"Unsupported shared expert fusion for quantization:
{
self
.
quant_config
.
get_name
()
}
."
)
else
:
else
:
suffix_list
=
[
suffix_list
=
[
"down_proj.weight"
,
"down_proj.weight"
,
"down_proj.weight_scale_inv"
,
"gate_proj.weight"
,
"gate_proj.weight"
,
"gate_proj.weight_scale_inv"
,
"up_proj.weight"
,
"up_proj.weight"
,
"up_proj.weight_scale_inv"
,
]
]
names_to_remove
=
[]
names_to_remove
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment