Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
a2ba0bc3
Unverified
Commit
a2ba0bc3
authored
Oct 20, 2025
by
Shangming Cai
Committed by
GitHub
Oct 20, 2025
Browse files
Tiny clean up for PD module and doc (#11747)
Signed-off-by:
Shangming Cai
<
csmthu@gmail.com
>
parent
6d2d0ce2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
5 deletions
+23
-5
docs/advanced_features/pd_disaggregation.md
docs/advanced_features/pd_disaggregation.md
+3
-0
python/sglang/srt/disaggregation/common/conn.py
python/sglang/srt/disaggregation/common/conn.py
+1
-0
python/sglang/srt/disaggregation/mooncake/conn.py
python/sglang/srt/disaggregation/mooncake/conn.py
+12
-5
python/sglang/srt/disaggregation/nixl/conn.py
python/sglang/srt/disaggregation/nixl/conn.py
+7
-0
No files found.
docs/advanced_features/pd_disaggregation.md
View file @
a2ba0bc3
...
...
@@ -41,6 +41,7 @@ uv pip install mooncake-transfer-engine
python
-m
sglang.launch_server
\
--model-path
meta-llama/Llama-3.1-8B-Instruct
\
--disaggregation-mode
prefill
\
--port
30000
\
--disaggregation-ib-device
mlx5_roce0
python
-m
sglang.launch_server
\
--model-path
meta-llama/Llama-3.1-8B-Instruct
\
...
...
@@ -179,6 +180,7 @@ pip install . --config-settings=setup-args="-Ducx_path=/path/to/ucx"
python
-m
sglang.launch_server
\
--model-path
meta-llama/Llama-3.1-8B-Instruct
\
--disaggregation-mode
prefill
\
--port
30000
\
--disaggregation-transfer-backend
nixl
python
-m
sglang.launch_server
\
--model-path
meta-llama/Llama-3.1-8B-Instruct
\
...
...
@@ -282,6 +284,7 @@ export ENABLE_ASCEND_TRANSFER_WITH_MOONCAKE=true
python
-m
sglang.launch_server
\
--model-path
meta-llama/Llama-3.1-8B-Instruct
\
--disaggregation-mode
prefill
\
--port
30000
\
--disaggregation-transfer-backend
ascend
python
-m
sglang.launch_server
\
--model-path
meta-llama/Llama-3.1-8B-Instruct
\
...
...
python/sglang/srt/disaggregation/common/conn.py
View file @
a2ba0bc3
...
...
@@ -246,6 +246,7 @@ class CommonKVReceiver(BaseKVReceiver):
f
"Could not fetch prefill parallel info from bootstrap_addr:
{
self
.
bootstrap_addr
}
"
,
)
self
.
kv_mgr
.
update_status
(
self
.
bootstrap_room
,
KVPoll
.
Failed
)
self
.
bootstrap_infos
=
None
return
else
:
logger
.
debug
(
...
...
python/sglang/srt/disaggregation/mooncake/conn.py
View file @
a2ba0bc3
...
...
@@ -174,7 +174,7 @@ class MooncakeKVManager(CommonKVManager):
cpu_count
=
os
.
cpu_count
()
transfer_thread_pool_size
=
get_int_env_var
(
"SGLANG_DISAGGREGATION_THREAD_POOL_SIZE"
,
min
(
max
(
4
,
int
(
0.
7
5
*
cpu_count
)
//
8
),
12
),
min
(
max
(
4
,
int
(
0.5
*
cpu_count
)
//
8
),
12
),
)
transfer_queue_size
=
get_int_env_var
(
"SGLANG_DISAGGREGATION_QUEUE_SIZE"
,
4
)
self
.
transfer_queues
:
List
[
FastQueue
]
=
[
...
...
@@ -190,9 +190,6 @@ class MooncakeKVManager(CommonKVManager):
)
for
_
in
range
(
transfer_queue_size
)
]
self
.
state_executors
=
concurrent
.
futures
.
ThreadPoolExecutor
(
transfer_thread_pool_size
//
transfer_queue_size
)
for
queue
,
executor
in
zip
(
self
.
transfer_queues
,
self
.
executors
):
threading
.
Thread
(
target
=
self
.
transfer_worker
,
args
=
(
queue
,
executor
),
daemon
=
True
...
...
@@ -641,6 +638,7 @@ class MooncakeKVManager(CommonKVManager):
req
:
TransferInfo
,
prefill_state_indices
:
list
[
int
],
dst_state_data_ptrs
:
list
[
int
],
executor
:
concurrent
.
futures
.
ThreadPoolExecutor
,
):
"""Send state or extra pool data with type-specific handling."""
state_type
=
getattr
(
self
.
kv_args
,
"state_type"
,
"none"
)
...
...
@@ -662,7 +660,7 @@ class MooncakeKVManager(CommonKVManager):
item_lens
=
self
.
kv_args
.
state_item_lens
,
prefill_data_indices
=
prefill_state_indices
,
dst_data_indices
=
dst_state_indices
,
executor
=
self
.
state_
executor
s
,
executor
=
executor
,
)
else
:
return
0
...
...
@@ -810,6 +808,7 @@ class MooncakeKVManager(CommonKVManager):
req
,
kv_chunk
.
state_indices
,
target_rank_registration_info
.
dst_state_data_ptrs
,
executor
,
)
if
self
.
pp_group
.
is_last_rank
:
...
...
@@ -1257,6 +1256,14 @@ class MooncakeKVReceiver(CommonKVReceiver):
aux_index
:
Optional
[
int
]
=
None
,
state_indices
:
Optional
[
List
[
int
]]
=
None
,
):
if
self
.
bootstrap_infos
is
None
:
self
.
kv_mgr
.
record_failure
(
self
.
bootstrap_room
,
f
"Could not fetch prefill parallel info from bootstrap_addr:
{
self
.
bootstrap_addr
}
"
,
)
self
.
kv_mgr
.
update_status
(
self
.
bootstrap_room
,
KVPoll
.
Failed
)
return
for
bootstrap_info
in
self
.
bootstrap_infos
:
sock
,
lock
=
self
.
_connect_to_bootstrap_server
(
bootstrap_info
)
is_dummy
=
bootstrap_info
[
"is_dummy"
]
...
...
python/sglang/srt/disaggregation/nixl/conn.py
View file @
a2ba0bc3
...
...
@@ -762,6 +762,13 @@ class NixlKVReceiver(CommonKVReceiver):
aux_index
:
Optional
[
int
]
=
None
,
state_indices
:
Optional
[
List
[
int
]]
=
None
,
):
if
self
.
bootstrap_infos
is
None
:
logger
.
error
(
f
"Could not fetch prefill parallel info from bootstrap_addr:
{
self
.
bootstrap_addr
}
"
,
)
self
.
kv_mgr
.
update_status
(
self
.
bootstrap_room
,
KVPoll
.
Failed
)
return
for
bootstrap_info
in
self
.
bootstrap_infos
:
logger
.
debug
(
f
"Fetched bootstrap info:
{
bootstrap_info
}
for engine rank:
{
self
.
kv_mgr
.
kv_args
.
engine_rank
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment