Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2df9d40a
"docs/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "f5b9bc8b49a7ef2066d0ee2d0e4c3337a458b0a7"
Unverified
Commit
2df9d40a
authored
May 17, 2025
by
fzyzcjy
Committed by
GitHub
May 16, 2025
Browse files
Minor code cleanup refactor for DeepSeek models (#6324)
parent
8dc191f2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
35 deletions
+26
-35
python/sglang/srt/layers/moe/ep_moe/layer.py
python/sglang/srt/layers/moe/ep_moe/layer.py
+10
-1
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+16
-34
No files found.
python/sglang/srt/layers/moe/ep_moe/layer.py
View file @
2df9d40a
...
@@ -5,6 +5,7 @@ import torch
...
@@ -5,6 +5,7 @@ import torch
from
torch.nn
import
Module
from
torch.nn
import
Module
from
sglang.srt.layers.quantization.deep_gemm
import
_ENABLE_JIT_DEEPGEMM
from
sglang.srt.layers.quantization.deep_gemm
import
_ENABLE_JIT_DEEPGEMM
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
try
:
try
:
from
deep_gemm
import
(
from
deep_gemm
import
(
...
@@ -40,7 +41,7 @@ from sglang.srt.layers.moe.ep_moe.kernels import (
...
@@ -40,7 +41,7 @@ from sglang.srt.layers.moe.ep_moe.kernels import (
tma_align_input_scale
,
tma_align_input_scale
,
)
)
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoeWeightScaleSupported
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoeWeightScaleSupported
from
sglang.srt.layers.moe.fused_moe_triton.layer
import
FusedMoEMethodBase
from
sglang.srt.layers.moe.fused_moe_triton.layer
import
FusedMoE
,
FusedMoEMethodBase
from
sglang.srt.layers.moe.topk
import
select_experts
from
sglang.srt.layers.moe.topk
import
select_experts
from
sglang.srt.layers.quantization.base_config
import
(
from
sglang.srt.layers.quantization.base_config
import
(
QuantizationConfig
,
QuantizationConfig
,
...
@@ -1173,3 +1174,11 @@ class DeepEPMoE(EPMoE):
...
@@ -1173,3 +1174,11 @@ class DeepEPMoE(EPMoE):
)
)
return
down_output
return
down_output
def
get_moe_impl_class
():
if
global_server_args_dict
[
"enable_deepep_moe"
]:
return
DeepEPMoE
if
global_server_args_dict
[
"enable_ep_moe"
]:
return
EPMoE
return
FusedMoE
python/sglang/srt/models/deepseek_v2.py
View file @
2df9d40a
...
@@ -52,7 +52,7 @@ from sglang.srt.layers.linear import (
...
@@ -52,7 +52,7 @@ from sglang.srt.layers.linear import (
RowParallelLinear
,
RowParallelLinear
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.moe.ep_moe.layer
import
DeepEPMoE
,
EPMoE
from
sglang.srt.layers.moe.ep_moe.layer
import
DeepEPMoE
,
EPMoE
,
get_moe_impl_class
from
sglang.srt.layers.moe.ep_moe.token_dispatcher
import
DeepEPDispatcher
from
sglang.srt.layers.moe.ep_moe.token_dispatcher
import
DeepEPDispatcher
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoE
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoE
from
sglang.srt.layers.moe.topk
import
select_experts
from
sglang.srt.layers.moe.topk
import
select_experts
...
@@ -222,13 +222,7 @@ class DeepseekV2MoE(nn.Module):
...
@@ -222,13 +222,7 @@ class DeepseekV2MoE(nn.Module):
self
.
gate
=
MoEGate
(
config
=
config
,
prefix
=
add_prefix
(
"gate"
,
prefix
))
self
.
gate
=
MoEGate
(
config
=
config
,
prefix
=
add_prefix
(
"gate"
,
prefix
))
MoEImpl
=
(
self
.
experts
=
get_moe_impl_class
()(
DeepEPMoE
if
global_server_args_dict
[
"enable_deepep_moe"
]
else
(
EPMoE
if
global_server_args_dict
[
"enable_ep_moe"
]
else
FusedMoE
)
)
self
.
experts
=
MoEImpl
(
num_experts
=
config
.
n_routed_experts
+
self
.
n_share_experts_fusion
,
num_experts
=
config
.
n_routed_experts
+
self
.
n_share_experts_fusion
,
top_k
=
config
.
num_experts_per_tok
+
min
(
self
.
n_share_experts_fusion
,
1
),
top_k
=
config
.
num_experts_per_tok
+
min
(
self
.
n_share_experts_fusion
,
1
),
hidden_size
=
config
.
hidden_size
,
hidden_size
=
config
.
hidden_size
,
...
@@ -251,26 +245,19 @@ class DeepseekV2MoE(nn.Module):
...
@@ -251,26 +245,19 @@ class DeepseekV2MoE(nn.Module):
if
config
.
n_shared_experts
is
not
None
and
self
.
n_share_experts_fusion
==
0
:
if
config
.
n_shared_experts
is
not
None
and
self
.
n_share_experts_fusion
==
0
:
intermediate_size
=
config
.
moe_intermediate_size
*
config
.
n_shared_experts
intermediate_size
=
config
.
moe_intermediate_size
*
config
.
n_shared_experts
# disable tp for shared experts when enable deepep moe
# disable tp for shared experts when enable deepep moe
if
not
global_server_args_dict
[
"enable_deepep_moe"
]:
self
.
shared_experts
=
DeepseekV2MLP
(
self
.
shared_experts
=
DeepseekV2MLP
(
hidden_size
=
config
.
hidden_size
,
hidden_size
=
config
.
hidden_size
,
intermediate_size
=
intermediate_size
,
intermediate_size
=
intermediate_size
,
hidden_act
=
config
.
hidden_act
,
hidden_act
=
config
.
hidden_act
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
reduce_results
=
False
,
reduce_results
=
False
,
prefix
=
add_prefix
(
"shared_experts"
,
prefix
),
prefix
=
add_prefix
(
"shared_experts"
,
prefix
),
**
(
)
dict
(
tp_rank
=
0
,
tp_size
=
1
)
else
:
if
global_server_args_dict
[
"enable_deepep_moe"
]
self
.
shared_experts
=
DeepseekV2MLP
(
else
{}
hidden_size
=
config
.
hidden_size
,
),
intermediate_size
=
intermediate_size
,
)
hidden_act
=
config
.
hidden_act
,
quant_config
=
quant_config
,
reduce_results
=
False
,
prefix
=
add_prefix
(
"shared_experts"
,
prefix
),
tp_rank
=
0
,
tp_size
=
1
,
)
if
global_server_args_dict
[
"enable_deepep_moe"
]:
if
global_server_args_dict
[
"enable_deepep_moe"
]:
# TODO: we will support tp < ep in the future
# TODO: we will support tp < ep in the future
...
@@ -1726,12 +1713,7 @@ class DeepseekV2ForCausalLM(nn.Module):
...
@@ -1726,12 +1713,7 @@ class DeepseekV2ForCausalLM(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
# (param_name, weight_name, expert_id, shard_id)
MoEImpl
=
(
expert_params_mapping
=
get_moe_impl_class
().
make_expert_params_mapping
(
DeepEPMoE
if
global_server_args_dict
[
"enable_deepep_moe"
]
else
(
EPMoE
if
global_server_args_dict
[
"enable_ep_moe"
]
else
FusedMoE
)
)
expert_params_mapping
=
MoEImpl
.
make_expert_params_mapping
(
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment