Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0ea330ca
"vscode:/vscode.git/clone" did not exist on "793613b5437f1c12c4f70892a336dbd967a1e07e"
Unverified
Commit
0ea330ca
authored
Jun 03, 2025
by
fzyzcjy
Committed by
GitHub
Jun 02, 2025
Browse files
Fix wrong weight reference in dynamic EPLB (#6818)
parent
27e327b4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
13 deletions
+27
-13
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+13
-8
python/sglang/srt/models/qwen3_moe.py
python/sglang/srt/models/qwen3_moe.py
+1
-5
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+13
-0
No files found.
python/sglang/srt/models/deepseek_v2.py
View file @
0ea330ca
...
...
@@ -91,6 +91,7 @@ from sglang.srt.two_batch_overlap import (
from
sglang.srt.utils
import
(
BumpAllocator
,
DeepEPMode
,
LazyValue
,
add_prefix
,
bind_or_assign
,
get_bool_env_var
,
...
...
@@ -1661,6 +1662,18 @@ class DeepseekV2ForCausalLM(nn.Module):
self
.
logits_processor
=
LogitsProcessor
(
config
)
self
.
dp_size
=
get_local_attention_dp_size
()
self
.
_routed_experts_weights_of_layer
=
LazyValue
(
lambda
:
{
layer_id
:
layer
.
mlp
.
get_moe_weights
()
for
layer_id
,
layer
in
enumerate
(
self
.
model
.
layers
)
if
isinstance
(
layer
.
mlp
,
DeepseekV2MoE
)
}
)
@
property
def
routed_experts_weights_of_layer
(
self
):
return
self
.
_routed_experts_weights_of_layer
.
value
def
determine_n_share_experts_fusion
(
self
,
architecture
:
str
=
"DeepseekV3ForCausalLM"
):
...
...
@@ -1873,14 +1886,6 @@ class DeepseekV2ForCausalLM(nn.Module):
self_attn
.
w_vc
=
bind_or_assign
(
self_attn
.
w_vc
,
w_vc
.
contiguous
())
self_attn
.
use_deep_gemm_bmm
=
True
# TODO support nextn later
if
not
is_nextn
:
self
.
routed_experts_weights_of_layer
=
{
layer_id
:
layer
.
mlp
.
get_moe_weights
()
for
layer_id
,
layer
in
enumerate
(
self
.
model
.
layers
)
if
isinstance
(
layer
.
mlp
,
DeepseekV2MoE
)
}
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]],
is_nextn
=
False
):
if
is_nextn
:
if
hasattr
(
self
.
config
,
"num_nextn_predict_layers"
):
...
...
python/sglang/srt/models/qwen3_moe.py
View file @
0ea330ca
...
...
@@ -18,15 +18,10 @@
"""Inference-only Qwen3MoE model compatible with HuggingFace weights."""
import
logging
from
dataclasses
import
dataclass
from
enum
import
Enum
,
auto
from
functools
import
partial
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers.configuration_utils
import
PretrainedConfig
from
sglang.srt.distributed
import
(
get_pp_group
,
...
...
@@ -811,6 +806,7 @@ class Qwen3MoeForCausalLM(nn.Module):
else
:
logger
.
warning
(
f
"Parameter
{
name
}
not found in params_dict"
)
# TODO mimic deepseek
self
.
routed_experts_weights_of_layer
=
{
layer_id
:
self
.
model
.
layers
[
layer_id
].
mlp
.
get_moe_weights
()
for
layer_id
in
range
(
self
.
start_layer
,
self
.
end_layer
)
...
...
python/sglang/srt/utils.py
View file @
0ea330ca
...
...
@@ -2257,3 +2257,16 @@ except:
def
cpu_has_amx_support
():
return
torch
.
_C
.
_cpu
.
_is_amx_tile_supported
()
and
is_intel_amx_backend_available
class
LazyValue
:
def
__init__
(
self
,
creator
:
Callable
):
self
.
_creator
=
creator
self
.
_value
=
None
@
property
def
value
(
self
):
if
self
.
_creator
is
not
None
:
self
.
_value
=
self
.
_creator
()
self
.
_creator
=
None
return
self
.
_value
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment