Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
499f5e62
Unverified
Commit
499f5e62
authored
Jun 04, 2025
by
Cheng Wan
Committed by
GitHub
Jun 04, 2025
Browse files
Fix one missing arg in DeepEP (#6878)
parent
81964328
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
19 deletions
+22
-19
python/sglang/srt/layers/moe/ep_moe/layer.py
python/sglang/srt/layers/moe/ep_moe/layer.py
+22
-19
No files found.
python/sglang/srt/layers/moe/ep_moe/layer.py
View file @
499f5e62
...
@@ -180,6 +180,9 @@ class EPMoE(torch.nn.Module):
...
@@ -180,6 +180,9 @@ class EPMoE(torch.nn.Module):
self
.
layer_id
=
layer_id
self
.
layer_id
=
layer_id
self
.
num_experts
=
num_experts
self
.
num_experts
=
num_experts
assert
self
.
num_experts
%
self
.
tp_size
==
0
assert
self
.
num_experts
%
self
.
tp_size
==
0
assert
(
num_fused_shared_experts
==
0
),
"num_fused_shared_experts is not supported in EP"
self
.
num_experts_per_partition
=
self
.
num_experts
//
self
.
tp_size
self
.
num_experts_per_partition
=
self
.
num_experts
//
self
.
tp_size
self
.
start_expert_id
=
self
.
tp_rank
*
self
.
num_experts_per_partition
self
.
start_expert_id
=
self
.
tp_rank
*
self
.
num_experts_per_partition
self
.
end_expert_id
=
self
.
start_expert_id
+
self
.
num_experts_per_partition
-
1
self
.
end_expert_id
=
self
.
start_expert_id
+
self
.
num_experts_per_partition
-
1
...
@@ -191,7 +194,6 @@ class EPMoE(torch.nn.Module):
...
@@ -191,7 +194,6 @@ class EPMoE(torch.nn.Module):
if
self
.
use_grouped_topk
:
if
self
.
use_grouped_topk
:
assert
num_expert_group
is
not
None
and
topk_group
is
not
None
assert
num_expert_group
is
not
None
and
topk_group
is
not
None
self
.
num_expert_group
=
num_expert_group
self
.
num_expert_group
=
num_expert_group
self
.
num_fused_shared_experts
=
num_fused_shared_experts
self
.
topk_group
=
topk_group
self
.
topk_group
=
topk_group
self
.
correction_bias
=
correction_bias
self
.
correction_bias
=
correction_bias
self
.
custom_routing_function
=
custom_routing_function
self
.
custom_routing_function
=
custom_routing_function
...
@@ -252,7 +254,6 @@ class EPMoE(torch.nn.Module):
...
@@ -252,7 +254,6 @@ class EPMoE(torch.nn.Module):
renormalize
=
self
.
renormalize
,
renormalize
=
self
.
renormalize
,
topk_group
=
self
.
topk_group
,
topk_group
=
self
.
topk_group
,
num_expert_group
=
self
.
num_expert_group
,
num_expert_group
=
self
.
num_expert_group
,
num_fused_shared_experts
=
self
.
num_fused_shared_experts
,
correction_bias
=
self
.
correction_bias
,
correction_bias
=
self
.
correction_bias
,
custom_routing_function
=
self
.
custom_routing_function
,
custom_routing_function
=
self
.
custom_routing_function
,
routed_scaling_factor
=
self
.
routed_scaling_factor
,
routed_scaling_factor
=
self
.
routed_scaling_factor
,
...
@@ -886,6 +887,7 @@ class DeepEPMoE(EPMoE):
...
@@ -886,6 +887,7 @@ class DeepEPMoE(EPMoE):
renormalize
:
bool
=
True
,
renormalize
:
bool
=
True
,
use_grouped_topk
:
bool
=
False
,
use_grouped_topk
:
bool
=
False
,
num_expert_group
:
Optional
[
int
]
=
None
,
num_expert_group
:
Optional
[
int
]
=
None
,
num_fused_shared_experts
:
int
=
0
,
topk_group
:
Optional
[
int
]
=
None
,
topk_group
:
Optional
[
int
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
tp_size
:
Optional
[
int
]
=
None
,
tp_size
:
Optional
[
int
]
=
None
,
...
@@ -897,23 +899,24 @@ class DeepEPMoE(EPMoE):
...
@@ -897,23 +899,24 @@ class DeepEPMoE(EPMoE):
deepep_mode
:
DeepEPMode
=
DeepEPMode
.
auto
,
deepep_mode
:
DeepEPMode
=
DeepEPMode
.
auto
,
):
):
super
().
__init__
(
super
().
__init__
(
num_experts
,
num_experts
=
num_experts
,
top_k
,
top_k
=
top_k
,
hidden_size
,
hidden_size
=
hidden_size
,
intermediate_size
,
intermediate_size
=
intermediate_size
,
layer_id
,
layer_id
=
layer_id
,
params_dtype
,
params_dtype
=
params_dtype
,
renormalize
,
renormalize
=
renormalize
,
use_grouped_topk
,
use_grouped_topk
=
use_grouped_topk
,
num_expert_group
,
num_expert_group
=
num_expert_group
,
topk_group
,
num_fused_shared_experts
=
num_fused_shared_experts
,
quant_config
,
topk_group
=
topk_group
,
tp_size
,
quant_config
=
quant_config
,
prefix
,
tp_size
=
tp_size
,
correction_bias
,
prefix
=
prefix
,
custom_routing_function
,
correction_bias
=
correction_bias
,
activation
,
custom_routing_function
=
custom_routing_function
,
routed_scaling_factor
,
activation
=
activation
,
routed_scaling_factor
=
routed_scaling_factor
,
)
)
self
.
deepep_mode
=
deepep_mode
self
.
deepep_mode
=
deepep_mode
if
self
.
deepep_mode
.
enable_low_latency
():
if
self
.
deepep_mode
.
enable_low_latency
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment