Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
774b47f3
Unverified
Commit
774b47f3
authored
Aug 08, 2025
by
fzyzcjy
Committed by
GitHub
Aug 08, 2025
Browse files
Reduce scheduler recv requests overhead (#8947)
parent
76915d68
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
54 additions
and
0 deletions
+54
-0
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+10
-0
python/sglang/srt/managers/scheduler_recv_skipper.py
python/sglang/srt/managers/scheduler_recv_skipper.py
+37
-0
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+7
-0
No files found.
python/sglang/srt/managers/scheduler.py
View file @
774b47f3
...
...
@@ -120,6 +120,7 @@ from sglang.srt.managers.scheduler_output_processor_mixin import (
SchedulerOutputProcessorMixin
,
)
from
sglang.srt.managers.scheduler_profiler_mixin
import
SchedulerProfilerMixin
from
sglang.srt.managers.scheduler_recv_skipper
import
SchedulerRecvSkipper
from
sglang.srt.managers.scheduler_update_weights_mixin
import
(
SchedulerUpdateWeightsMixin
,
)
...
...
@@ -474,6 +475,7 @@ class Scheduler(
)
self
.
init_profier
()
self
.
recv_skipper
=
SchedulerRecvSkipper
.
maybe_create
(
server_args
)
self
.
input_blocker
=
(
SchedulerInputBlocker
(
noop
=
self
.
attn_tp_rank
!=
0
)
if
get_bool_env_var
(
"SGLANG_ENABLE_COLOCATED_BATCH_GEN"
)
...
...
@@ -946,6 +948,14 @@ class Scheduler(
def
recv_requests
(
self
)
->
List
[
Req
]:
"""Receive results at tp_rank = 0 and broadcast it to all other TP ranks."""
if
self
.
recv_skipper
is
not
None
:
last_forward_mode
=
(
self
.
last_batch
.
forward_mode
if
self
.
last_batch
is
not
None
else
None
)
if
not
self
.
recv_skipper
.
handle
(
last_forward_mode
):
return
[]
if
self
.
pp_rank
==
0
:
if
self
.
attn_tp_rank
==
0
:
recv_reqs
=
[]
...
...
python/sglang/srt/managers/scheduler_recv_skipper.py
0 → 100644
View file @
774b47f3
from
sglang.srt.model_executor.forward_batch_info
import
ForwardMode
from
sglang.srt.server_args
import
ServerArgs
class
SchedulerRecvSkipper
:
@
staticmethod
def
maybe_create
(
server_args
:
ServerArgs
):
if
server_args
.
scheduler_recv_interval
<=
1
:
return
None
return
SchedulerRecvSkipper
(
server_args
)
def
__init__
(
self
,
server_args
:
ServerArgs
):
# Can be supported if needed, but may need e.g. `global_forward_mode`
assert
not
server_args
.
enable_dp_attention
self
.
_counter
=
0
self
.
_threshold
=
server_args
.
scheduler_recv_interval
def
handle
(
self
,
last_forward_mode
:
ForwardMode
):
should_recv
=
False
last_weight
=
_WEIGHT_OF_FORWARD_MODE
.
get
(
last_forward_mode
,
_DEFAULT_WEIGHT
)
self
.
_counter
+=
last_weight
if
self
.
_counter
>=
self
.
_threshold
:
self
.
_counter
=
0
should_recv
=
True
return
should_recv
# All can be tuned if needed
_DEFAULT_WEIGHT
=
1000
_WEIGHT_OF_FORWARD_MODE
=
{
ForwardMode
.
DECODE
:
1
,
ForwardMode
.
TARGET_VERIFY
:
1
,
None
:
1
,
}
python/sglang/srt/server_args.py
View file @
774b47f3
...
...
@@ -249,6 +249,7 @@ class ServerArgs:
enable_return_hidden_states
:
bool
=
False
enable_triton_kernel_moe
:
bool
=
False
enable_flashinfer_mxfp4_moe
:
bool
=
False
scheduler_recv_interval
:
int
=
1
# Debug tensor dumps
debug_tensor_dump_output_folder
:
Optional
[
str
]
=
None
...
...
@@ -1845,6 +1846,12 @@ class ServerArgs:
action
=
"store_true"
,
help
=
"Enable FlashInfer MXFP4 MoE backend for modelopt_fp4 quant on Blackwell."
,
)
parser
.
add_argument
(
"--scheduler-recv-interval"
,
type
=
int
,
default
=
ServerArgs
.
scheduler_recv_interval
,
help
=
"The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this."
,
)
# Debug tensor dumps
parser
.
add_argument
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment