Unverified Commit c2ff33cc authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Core] Enable async scheduling by default (#27614)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent b12cb383
...@@ -130,11 +130,12 @@ class SchedulerConfig: ...@@ -130,11 +130,12 @@ class SchedulerConfig:
and starting configuration. and starting configuration.
""" """
async_scheduling: bool = False async_scheduling: bool = Field(default=None)
"""If set to True, perform async scheduling. This helps to avoid gaps in """If set to False, disable async scheduling. Async scheduling helps to
GPU utilization, leading to better latency and throughput. avoid gaps in GPU utilization, leading to better latency and throughput.
Async scheduling is currently not supported with some features such as It is currently not supported with some features such as
speculative decoding and pipeline parallelism. speculative decoding and pipeline parallelism, and will be automatically
disabled in those cases.
""" """
stream_interval: int = Field(default=1, ge=1) stream_interval: int = Field(default=1, ge=1)
......
...@@ -552,7 +552,7 @@ class VllmConfig: ...@@ -552,7 +552,7 @@ class VllmConfig:
if self.speculative_config.method not in get_args(EagleModelTypes): if self.speculative_config.method not in get_args(EagleModelTypes):
raise ValueError( raise ValueError(
"Currently, async scheduling is only supported " "Currently, async scheduling is only supported "
"with EAGLE/MTP kind of speculative decoding" "with EAGLE/MTP kind of speculative decoding."
) )
if self.speculative_config.disable_padded_drafter_batch: if self.speculative_config.disable_padded_drafter_batch:
raise ValueError( raise ValueError(
...@@ -570,16 +570,27 @@ class VllmConfig: ...@@ -570,16 +570,27 @@ class VllmConfig:
) )
elif self.scheduler_config.async_scheduling is None: elif self.scheduler_config.async_scheduling is None:
# Enable async scheduling unless there is an incompatible option. # Enable async scheduling unless there is an incompatible option.
# NOTE: we won't reach here until async scheduling is enabled by default. if self.parallel_config.pipeline_parallel_size > 1:
if (
self.parallel_config.pipeline_parallel_size > 1
or self.speculative_config is not None
):
logger.warning( logger.warning(
"Async scheduling is not yet supported with speculative decoding " "Async scheduling is not yet supported with "
" or pipeline_parallel_size > 1 and will be disabled." "pipeline_parallel_size > 1 and will be disabled."
) )
self.scheduler_config.async_scheduling = False self.scheduler_config.async_scheduling = False
elif self.speculative_config is not None:
if self.speculative_config.method not in get_args(EagleModelTypes):
logger.warning(
"Async scheduling not supported with %s-based "
"speculative decoding and will be disabled.",
self.speculative_config.method,
)
else:
logger.warning(
"Async scheduling will be disabled because some features do "
"not currently work in conjunction with speculative decoding. "
"To use async scheduling with spec decoding anyway, "
"enable it explicitly via async_scheduling=True."
)
self.scheduler_config.async_scheduling = False
elif not executor_supports_async_sched: elif not executor_supports_async_sched:
logger.warning( logger.warning(
"Async scheduling will be disabled because it is not supported " "Async scheduling will be disabled because it is not supported "
...@@ -595,11 +606,16 @@ class VllmConfig: ...@@ -595,11 +606,16 @@ class VllmConfig:
self.scheduler_config.async_scheduling self.scheduler_config.async_scheduling
and not self.parallel_config.disable_nccl_for_dp_synchronization and not self.parallel_config.disable_nccl_for_dp_synchronization
): ):
logger.info( logger.info_once(
"Disabling NCCL for DP synchronization when using async scheduling." "Disabling NCCL for DP synchronization when using async scheduling."
) )
self.parallel_config.disable_nccl_for_dp_synchronization = True self.parallel_config.disable_nccl_for_dp_synchronization = True
logger.info_once(
"Asynchronous scheduling is %s.",
"enabled" if self.scheduler_config.async_scheduling else "disabled",
)
from vllm.platforms import current_platform from vllm.platforms import current_platform
if ( if (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment