Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
453511ac
"doc/en/install.md" did not exist on "4f4ed36442894fa8a0b2f228c8ec89453c740c8d"
Unverified
Commit
453511ac
authored
Sep 04, 2025
by
Cheng Wan
Committed by
GitHub
Sep 04, 2025
Browse files
Save memory for expert model parallel (#9957)
parent
d0730487
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
29 deletions
+35
-29
python/sglang/srt/distributed/parallel_state.py
python/sglang/srt/distributed/parallel_state.py
+35
-29
No files found.
python/sglang/srt/distributed/parallel_state.py
View file @
453511ac
...
@@ -1458,10 +1458,15 @@ def initialize_model_parallel(
...
@@ -1458,10 +1458,15 @@ def initialize_model_parallel(
_PDMUX_PREFILL_TP_GROUP
.
pynccl_comm
.
disabled
=
False
_PDMUX_PREFILL_TP_GROUP
.
pynccl_comm
.
disabled
=
False
moe_ep_size
=
expert_model_parallel_size
moe_ep_size
=
expert_model_parallel_size
moe_tp_size
=
tensor_model_parallel_size
//
moe_ep_size
moe_tp_size
=
tensor_model_parallel_size
//
moe_ep_size
global
_MOE_EP
global
_MOE_EP
assert
_MOE_EP
is
None
,
"expert model parallel group is already initialized"
assert
_MOE_EP
is
None
,
"expert model parallel group is already initialized"
if
moe_ep_size
==
tensor_model_parallel_size
:
_MOE_EP
=
_TP
else
:
# TODO(ch-wan): use split_group to save memory
group_ranks
=
[]
group_ranks
=
[]
for
i
in
range
(
num_tensor_model_parallel_groups
):
for
i
in
range
(
num_tensor_model_parallel_groups
):
for
j
in
range
(
moe_tp_size
):
for
j
in
range
(
moe_tp_size
):
...
@@ -1469,17 +1474,20 @@ def initialize_model_parallel(
...
@@ -1469,17 +1474,20 @@ def initialize_model_parallel(
en
=
(
i
+
1
)
*
tensor_model_parallel_size
+
j
en
=
(
i
+
1
)
*
tensor_model_parallel_size
+
j
ranks
=
list
(
range
(
st
,
en
,
moe_tp_size
))
ranks
=
list
(
range
(
st
,
en
,
moe_tp_size
))
group_ranks
.
append
(
ranks
)
group_ranks
.
append
(
ranks
)
_MOE_EP
=
init_model_parallel_group
(
_MOE_EP
=
init_model_parallel_group
(
group_ranks
,
group_ranks
,
get_world_group
().
local_rank
,
get_world_group
().
local_rank
,
backend
,
backend
,
use_custom_allreduce
=
False
,
group_name
=
"moe_ep"
,
group_name
=
"moe_ep"
,
)
)
global
_MOE_TP
global
_MOE_TP
assert
_MOE_TP
is
None
,
"expert model parallel group is already initialized"
assert
_MOE_TP
is
None
,
"expert model parallel group is already initialized"
if
moe_tp_size
==
tensor_model_parallel_size
:
_MOE_TP
=
_TP
else
:
# TODO(ch-wan): use split_group to save memory
group_ranks
=
[]
group_ranks
=
[]
for
i
in
range
(
num_tensor_model_parallel_groups
):
for
i
in
range
(
num_tensor_model_parallel_groups
):
for
j
in
range
(
moe_ep_size
):
for
j
in
range
(
moe_ep_size
):
...
@@ -1487,12 +1495,10 @@ def initialize_model_parallel(
...
@@ -1487,12 +1495,10 @@ def initialize_model_parallel(
en
=
i
*
tensor_model_parallel_size
+
(
j
+
1
)
*
moe_tp_size
en
=
i
*
tensor_model_parallel_size
+
(
j
+
1
)
*
moe_tp_size
ranks
=
list
(
range
(
st
,
en
))
ranks
=
list
(
range
(
st
,
en
))
group_ranks
.
append
(
ranks
)
group_ranks
.
append
(
ranks
)
_MOE_TP
=
init_model_parallel_group
(
_MOE_TP
=
init_model_parallel_group
(
group_ranks
,
group_ranks
,
get_world_group
().
local_rank
,
get_world_group
().
local_rank
,
backend
,
backend
,
use_custom_allreduce
=
False
,
group_name
=
"moe_tp"
,
group_name
=
"moe_tp"
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment