Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
73b13e69
"mmdet3d/vscode:/vscode.git/clone" did not exist on "f27d308fbea86fe36a7189923be74d78b767909f"
Unverified
Commit
73b13e69
authored
Jun 20, 2025
by
Cheng Wan
Committed by
GitHub
Jun 20, 2025
Browse files
Optimize DP attn scheduling for speculative decoding (#7285)
parent
8609e637
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
32 deletions
+12
-32
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+12
-32
No files found.
python/sglang/srt/managers/scheduler.py
View file @
73b13e69
...
...
@@ -1399,29 +1399,6 @@ class Scheduler(
self
.
metrics_collector
.
log_stats
(
self
.
stats
)
self
.
_publish_kv_events
()
def
coordinate_spec_dp_attn_batch
(
self
,
new_batch
:
Optional
[
ScheduleBatch
]):
"""Coordinate the DP attention batch."""
local_info
=
torch
.
tensor
(
[
(
new_batch
is
not
None
),
],
dtype
=
torch
.
int64
,
)
global_info
=
torch
.
empty
(
(
self
.
server_args
.
dp_size
,
self
.
attn_tp_size
,
1
),
dtype
=
torch
.
int64
,
)
torch
.
distributed
.
all_gather_into_tensor
(
global_info
.
flatten
(),
local_info
,
group
=
self
.
tp_cpu_group
,
)
any_new_batch
=
any
(
global_info
[:,
0
,
0
].
tolist
()
)
# Any DP worker has forward batch
return
any_new_batch
def
get_next_batch_to_run
(
self
)
->
Optional
[
ScheduleBatch
]:
# Merge the prefill batch into the running batch
chunked_req_to_exclude
=
set
()
...
...
@@ -1456,13 +1433,15 @@ class Scheduler(
new_batch
=
self
.
get_new_batch_prefill
()
# TODO(ch-wan): minor refactor is needed here to improve readability
any_new_batch
=
(
self
.
server_args
.
enable_dp_attention
and
not
self
.
spec_algorithm
.
is_none
()
and
self
.
coordinate_spec_dp_attn_batch
(
new_batch
)
)
if
new_batch
is
not
None
or
any_new_batch
:
need_dp_attn_preparation
=
require_mlp_sync
(
self
.
server_args
)
if
need_dp_attn_preparation
and
not
self
.
spec_algorithm
.
is_none
():
# In speculative decoding, prefill batches and decode batches cannot be processed in the same DP attention group.
# We prepare idle batches in advance to skip preparing decode batches when there are prefill batches in the group.
new_batch
,
_
=
self
.
prepare_dp_attn_batch
(
new_batch
)
need_dp_attn_preparation
=
new_batch
is
None
if
new_batch
is
not
None
:
# Run prefill first if possible
ret
=
new_batch
else
:
...
...
@@ -1473,8 +1452,9 @@ class Scheduler(
else
:
ret
=
None
if
require_mlp_sync
(
self
.
server_args
):
ret
,
_
=
self
.
prepare_mlp_sync_batch
(
ret
)
# Handle DP attention
if
need_dp_attn_preparation
:
ret
,
_
=
self
.
prepare_dp_attn_batch
(
ret
)
return
ret
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment