Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0ea330ca
Unverified
Commit
0ea330ca
authored
Jun 03, 2025
by
fzyzcjy
Committed by
GitHub
Jun 02, 2025
Browse files
Fix wrong weight reference in dynamic EPLB (#6818)
parent
27e327b4
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
13 deletions
+27
-13
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+13
-8
python/sglang/srt/models/qwen3_moe.py
python/sglang/srt/models/qwen3_moe.py
+1
-5
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+13
-0
No files found.
python/sglang/srt/models/deepseek_v2.py
View file @
0ea330ca
...
@@ -91,6 +91,7 @@ from sglang.srt.two_batch_overlap import (
...
@@ -91,6 +91,7 @@ from sglang.srt.two_batch_overlap import (
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
BumpAllocator
,
BumpAllocator
,
DeepEPMode
,
DeepEPMode
,
LazyValue
,
add_prefix
,
add_prefix
,
bind_or_assign
,
bind_or_assign
,
get_bool_env_var
,
get_bool_env_var
,
...
@@ -1661,6 +1662,18 @@ class DeepseekV2ForCausalLM(nn.Module):
...
@@ -1661,6 +1662,18 @@ class DeepseekV2ForCausalLM(nn.Module):
self
.
logits_processor
=
LogitsProcessor
(
config
)
self
.
logits_processor
=
LogitsProcessor
(
config
)
self
.
dp_size
=
get_local_attention_dp_size
()
self
.
dp_size
=
get_local_attention_dp_size
()
self
.
_routed_experts_weights_of_layer
=
LazyValue
(
lambda
:
{
layer_id
:
layer
.
mlp
.
get_moe_weights
()
for
layer_id
,
layer
in
enumerate
(
self
.
model
.
layers
)
if
isinstance
(
layer
.
mlp
,
DeepseekV2MoE
)
}
)
@
property
def
routed_experts_weights_of_layer
(
self
):
return
self
.
_routed_experts_weights_of_layer
.
value
def
determine_n_share_experts_fusion
(
def
determine_n_share_experts_fusion
(
self
,
architecture
:
str
=
"DeepseekV3ForCausalLM"
self
,
architecture
:
str
=
"DeepseekV3ForCausalLM"
):
):
...
@@ -1873,14 +1886,6 @@ class DeepseekV2ForCausalLM(nn.Module):
...
@@ -1873,14 +1886,6 @@ class DeepseekV2ForCausalLM(nn.Module):
self_attn
.
w_vc
=
bind_or_assign
(
self_attn
.
w_vc
,
w_vc
.
contiguous
())
self_attn
.
w_vc
=
bind_or_assign
(
self_attn
.
w_vc
,
w_vc
.
contiguous
())
self_attn
.
use_deep_gemm_bmm
=
True
self_attn
.
use_deep_gemm_bmm
=
True
# TODO support nextn later
if
not
is_nextn
:
self
.
routed_experts_weights_of_layer
=
{
layer_id
:
layer
.
mlp
.
get_moe_weights
()
for
layer_id
,
layer
in
enumerate
(
self
.
model
.
layers
)
if
isinstance
(
layer
.
mlp
,
DeepseekV2MoE
)
}
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]],
is_nextn
=
False
):
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]],
is_nextn
=
False
):
if
is_nextn
:
if
is_nextn
:
if
hasattr
(
self
.
config
,
"num_nextn_predict_layers"
):
if
hasattr
(
self
.
config
,
"num_nextn_predict_layers"
):
...
...
python/sglang/srt/models/qwen3_moe.py
View file @
0ea330ca
...
@@ -18,15 +18,10 @@
...
@@ -18,15 +18,10 @@
"""Inference-only Qwen3MoE model compatible with HuggingFace weights."""
"""Inference-only Qwen3MoE model compatible with HuggingFace weights."""
import
logging
import
logging
from
dataclasses
import
dataclass
from
enum
import
Enum
,
auto
from
functools
import
partial
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
import
torch
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
transformers.configuration_utils
import
PretrainedConfig
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
get_pp_group
,
get_pp_group
,
...
@@ -811,6 +806,7 @@ class Qwen3MoeForCausalLM(nn.Module):
...
@@ -811,6 +806,7 @@ class Qwen3MoeForCausalLM(nn.Module):
else
:
else
:
logger
.
warning
(
f
"Parameter
{
name
}
not found in params_dict"
)
logger
.
warning
(
f
"Parameter
{
name
}
not found in params_dict"
)
# TODO mimic deepseek
self
.
routed_experts_weights_of_layer
=
{
self
.
routed_experts_weights_of_layer
=
{
layer_id
:
self
.
model
.
layers
[
layer_id
].
mlp
.
get_moe_weights
()
layer_id
:
self
.
model
.
layers
[
layer_id
].
mlp
.
get_moe_weights
()
for
layer_id
in
range
(
self
.
start_layer
,
self
.
end_layer
)
for
layer_id
in
range
(
self
.
start_layer
,
self
.
end_layer
)
...
...
python/sglang/srt/utils.py
View file @
0ea330ca
...
@@ -2257,3 +2257,16 @@ except:
...
@@ -2257,3 +2257,16 @@ except:
def
cpu_has_amx_support
():
def
cpu_has_amx_support
():
return
torch
.
_C
.
_cpu
.
_is_amx_tile_supported
()
and
is_intel_amx_backend_available
return
torch
.
_C
.
_cpu
.
_is_amx_tile_supported
()
and
is_intel_amx_backend_available
class
LazyValue
:
def
__init__
(
self
,
creator
:
Callable
):
self
.
_creator
=
creator
self
.
_value
=
None
@
property
def
value
(
self
):
if
self
.
_creator
is
not
None
:
self
.
_value
=
self
.
_creator
()
self
.
_creator
=
None
return
self
.
_value
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment