Unverified Commit c2ff33cc authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Core] Enable async scheduling by default (#27614)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent b12cb383
......@@ -130,11 +130,12 @@ class SchedulerConfig:
and starting configuration.
"""
async_scheduling: bool = False
"""If set to True, perform async scheduling. This helps to avoid gaps in
GPU utilization, leading to better latency and throughput.
Async scheduling is currently not supported with some features such as
speculative decoding and pipeline parallelism.
async_scheduling: bool = Field(default=None)
"""If set to False, disable async scheduling. Async scheduling helps to
avoid gaps in GPU utilization, leading to better latency and throughput.
It is currently not supported with some features such as
speculative decoding and pipeline parallelism, and will be automatically
disabled in those cases.
"""
stream_interval: int = Field(default=1, ge=1)
......
......@@ -552,7 +552,7 @@ class VllmConfig:
if self.speculative_config.method not in get_args(EagleModelTypes):
raise ValueError(
"Currently, async scheduling is only supported "
"with EAGLE/MTP kind of speculative decoding"
"with EAGLE/MTP kind of speculative decoding."
)
if self.speculative_config.disable_padded_drafter_batch:
raise ValueError(
......@@ -570,16 +570,27 @@ class VllmConfig:
)
elif self.scheduler_config.async_scheduling is None:
# Enable async scheduling unless there is an incompatible option.
# NOTE: we won't reach here until async scheduling is enabled by default.
if (
self.parallel_config.pipeline_parallel_size > 1
or self.speculative_config is not None
):
if self.parallel_config.pipeline_parallel_size > 1:
logger.warning(
"Async scheduling is not yet supported with speculative decoding "
" or pipeline_parallel_size > 1 and will be disabled."
"Async scheduling is not yet supported with "
"pipeline_parallel_size > 1 and will be disabled."
)
self.scheduler_config.async_scheduling = False
elif self.speculative_config is not None:
if self.speculative_config.method not in get_args(EagleModelTypes):
logger.warning(
"Async scheduling not supported with %s-based "
"speculative decoding and will be disabled.",
self.speculative_config.method,
)
else:
logger.warning(
"Async scheduling will be disabled because some features do "
"not currently work in conjunction with speculative decoding. "
"To use async scheduling with spec decoding anyway, "
"enable it explicitly via async_scheduling=True."
)
self.scheduler_config.async_scheduling = False
elif not executor_supports_async_sched:
logger.warning(
"Async scheduling will be disabled because it is not supported "
......@@ -595,11 +606,16 @@ class VllmConfig:
self.scheduler_config.async_scheduling
and not self.parallel_config.disable_nccl_for_dp_synchronization
):
logger.info(
logger.info_once(
"Disabling NCCL for DP synchronization when using async scheduling."
)
self.parallel_config.disable_nccl_for_dp_synchronization = True
logger.info_once(
"Asynchronous scheduling is %s.",
"enabled" if self.scheduler_config.async_scheduling else "disabled",
)
from vllm.platforms import current_platform
if (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment