Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
a16923ef
Unverified
Commit
a16923ef
authored
Aug 14, 2025
by
Francis
Committed by
GitHub
Aug 13, 2025
Browse files
[PD] optimize kv cache transfer directly using batch transfer (#9149)
Co-authored-by:
Shangming Cai
<
csmthu@gmail.com
>
parent
6337d905
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
17 deletions
+33
-17
python/sglang/srt/disaggregation/mooncake/conn.py
python/sglang/srt/disaggregation/mooncake/conn.py
+33
-17
No files found.
python/sglang/srt/disaggregation/mooncake/conn.py
View file @
a16923ef
...
...
@@ -356,33 +356,49 @@ class MooncakeKVManager(BaseKVManager):
]
assert
layers_params
is
not
None
# Worker function for processing a single layer
def
process_layer
(
src_ptr
:
int
,
dst_ptr
:
int
,
item_len
:
int
)
->
int
:
def
set_transfer_blocks
(
src_ptr
:
int
,
dst_ptr
:
int
,
item_len
:
int
)
->
List
[
Tuple
[
int
,
int
,
int
]]:
transfer_blocks
=
[]
for
prefill_index
,
decode_index
in
zip
(
prefill_kv_blocks
,
dst_kv_blocks
):
src_addr
=
src_ptr
+
int
(
prefill_index
[
0
])
*
item_len
dst_addr
=
dst_ptr
+
int
(
decode_index
[
0
])
*
item_len
length
=
item_len
*
len
(
prefill_index
)
transfer_blocks
.
append
((
src_addr
,
dst_addr
,
length
))
return
transfer_blocks
# Worker function for processing a single layer
def
process_layer
(
src_ptr
:
int
,
dst_ptr
:
int
,
item_len
:
int
)
->
int
:
transfer_blocks
=
set_transfer_blocks
(
src_ptr
,
dst_ptr
,
item_len
)
return
self
.
_transfer_data
(
mooncake_session_id
,
transfer_blocks
)
futures
=
[
executor
.
submit
(
process_layer
,
src_ptr
,
dst_ptr
,
item_len
,
)
for
(
src_ptr
,
dst_ptr
,
item_len
)
in
layers_params
]
# Worker function for processing all layers in a batch
def
process_layers
(
layers_params
:
List
[
Tuple
[
int
,
int
,
int
]])
->
int
:
transfer_blocks
=
[]
for
src_ptr
,
dst_ptr
,
item_len
in
layers_params
:
transfer_blocks
.
extend
(
set_transfer_blocks
(
src_ptr
,
dst_ptr
,
item_len
))
return
self
.
_transfer_data
(
mooncake_session_id
,
transfer_blocks
)
for
future
in
concurrent
.
futures
.
as_completed
(
futures
):
status
=
future
.
result
()
if
status
!=
0
:
for
f
in
futures
:
f
.
cancel
()
return
status
if
self
.
enable_custom_mem_pool
:
futures
=
[
executor
.
submit
(
process_layer
,
src_ptr
,
dst_ptr
,
item_len
,
)
for
(
src_ptr
,
dst_ptr
,
item_len
)
in
layers_params
]
for
future
in
concurrent
.
futures
.
as_completed
(
futures
):
status
=
future
.
result
()
if
status
!=
0
:
for
f
in
futures
:
f
.
cancel
()
return
status
else
:
# Combining all layers' params in one batch transfer is more efficient
# compared to using multiple threads
return
process_layers
(
layers_params
)
return
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment