Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c2ff33cc
Unverified
Commit
c2ff33cc
authored
Dec 29, 2025
by
Nick Hill
Committed by
GitHub
Dec 29, 2025
Browse files
[Core] Enable async scheduling by default (#27614)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
b12cb383
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
14 deletions
+31
-14
vllm/config/scheduler.py
vllm/config/scheduler.py
+6
-5
vllm/config/vllm.py
vllm/config/vllm.py
+25
-9
No files found.
vllm/config/scheduler.py
View file @
c2ff33cc
...
@@ -130,11 +130,12 @@ class SchedulerConfig:
...
@@ -130,11 +130,12 @@ class SchedulerConfig:
and starting configuration.
and starting configuration.
"""
"""
async_scheduling
:
bool
=
False
async_scheduling
:
bool
=
Field
(
default
=
None
)
"""If set to True, perform async scheduling. This helps to avoid gaps in
"""If set to False, disable async scheduling. Async scheduling helps to
GPU utilization, leading to better latency and throughput.
avoid gaps in GPU utilization, leading to better latency and throughput.
Async scheduling is currently not supported with some features such as
It is currently not supported with some features such as
speculative decoding and pipeline parallelism.
speculative decoding and pipeline parallelism, and will be automatically
disabled in those cases.
"""
"""
stream_interval
:
int
=
Field
(
default
=
1
,
ge
=
1
)
stream_interval
:
int
=
Field
(
default
=
1
,
ge
=
1
)
...
...
vllm/config/vllm.py
View file @
c2ff33cc
...
@@ -552,7 +552,7 @@ class VllmConfig:
...
@@ -552,7 +552,7 @@ class VllmConfig:
if
self
.
speculative_config
.
method
not
in
get_args
(
EagleModelTypes
):
if
self
.
speculative_config
.
method
not
in
get_args
(
EagleModelTypes
):
raise
ValueError
(
raise
ValueError
(
"Currently, async scheduling is only supported "
"Currently, async scheduling is only supported "
"with EAGLE/MTP kind of speculative decoding"
"with EAGLE/MTP kind of speculative decoding
.
"
)
)
if
self
.
speculative_config
.
disable_padded_drafter_batch
:
if
self
.
speculative_config
.
disable_padded_drafter_batch
:
raise
ValueError
(
raise
ValueError
(
...
@@ -570,14 +570,25 @@ class VllmConfig:
...
@@ -570,14 +570,25 @@ class VllmConfig:
)
)
elif
self
.
scheduler_config
.
async_scheduling
is
None
:
elif
self
.
scheduler_config
.
async_scheduling
is
None
:
# Enable async scheduling unless there is an incompatible option.
# Enable async scheduling unless there is an incompatible option.
# NOTE: we won't reach here until async scheduling is enabled by default.
if
self
.
parallel_config
.
pipeline_parallel_size
>
1
:
if
(
self
.
parallel_config
.
pipeline_parallel_size
>
1
or
self
.
speculative_config
is
not
None
):
logger
.
warning
(
logger
.
warning
(
"Async scheduling is not yet supported with speculative decoding "
"Async scheduling is not yet supported with "
" or pipeline_parallel_size > 1 and will be disabled."
"pipeline_parallel_size > 1 and will be disabled."
)
self
.
scheduler_config
.
async_scheduling
=
False
elif
self
.
speculative_config
is
not
None
:
if
self
.
speculative_config
.
method
not
in
get_args
(
EagleModelTypes
):
logger
.
warning
(
"Async scheduling not supported with %s-based "
"speculative decoding and will be disabled."
,
self
.
speculative_config
.
method
,
)
else
:
logger
.
warning
(
"Async scheduling will be disabled because some features do "
"not currently work in conjunction with speculative decoding. "
"To use async scheduling with spec decoding anyway, "
"enable it explicitly via async_scheduling=True."
)
)
self
.
scheduler_config
.
async_scheduling
=
False
self
.
scheduler_config
.
async_scheduling
=
False
elif
not
executor_supports_async_sched
:
elif
not
executor_supports_async_sched
:
...
@@ -595,11 +606,16 @@ class VllmConfig:
...
@@ -595,11 +606,16 @@ class VllmConfig:
self
.
scheduler_config
.
async_scheduling
self
.
scheduler_config
.
async_scheduling
and
not
self
.
parallel_config
.
disable_nccl_for_dp_synchronization
and
not
self
.
parallel_config
.
disable_nccl_for_dp_synchronization
):
):
logger
.
info
(
logger
.
info
_once
(
"Disabling NCCL for DP synchronization when using async scheduling."
"Disabling NCCL for DP synchronization when using async scheduling."
)
)
self
.
parallel_config
.
disable_nccl_for_dp_synchronization
=
True
self
.
parallel_config
.
disable_nccl_for_dp_synchronization
=
True
logger
.
info_once
(
"Asynchronous scheduling is %s."
,
"enabled"
if
self
.
scheduler_config
.
async_scheduling
else
"disabled"
,
)
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
if
(
if
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment