Unverified Commit 3adffd5b authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Misc] Enable async scheduling by default with spec decoding (#31998)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent 97ba96fb
...@@ -582,11 +582,8 @@ class VllmConfig: ...@@ -582,11 +582,8 @@ class VllmConfig:
) )
if self.speculative_config.disable_padded_drafter_batch: if self.speculative_config.disable_padded_drafter_batch:
raise ValueError( raise ValueError(
"async scheduling for EAGLE/MTP kind of speculative " "Async scheduling is not compatible with "
"decoding is enabled, but disable_padded_drafter_batch=True " "disable_padded_drafter_batch=True."
"disable_padded_drafter_batch=True is not supported for "
"this situation now. please set "
"disable_padded_drafter_batch=Fasle"
) )
if not executor_supports_async_sched: if not executor_supports_async_sched:
raise ValueError( raise ValueError(
...@@ -597,32 +594,40 @@ class VllmConfig: ...@@ -597,32 +594,40 @@ class VllmConfig:
elif self.scheduler_config.async_scheduling is None: elif self.scheduler_config.async_scheduling is None:
# Enable async scheduling unless there is an incompatible option. # Enable async scheduling unless there is an incompatible option.
if self.parallel_config.pipeline_parallel_size > 1: if self.parallel_config.pipeline_parallel_size > 1:
logger.warning( logger.warning_once(
"Async scheduling is not yet supported with " "Async scheduling is not yet supported with "
"pipeline_parallel_size > 1 and will be disabled." "pipeline_parallel_size > 1 and will be disabled.",
scope="local",
) )
self.scheduler_config.async_scheduling = False self.scheduler_config.async_scheduling = False
elif self.speculative_config is not None: elif (
if self.speculative_config.method not in get_args(EagleModelTypes): self.speculative_config is not None
logger.warning( and self.speculative_config.method not in get_args(EagleModelTypes)
"Async scheduling not supported with %s-based " ):
"speculative decoding and will be disabled.", logger.warning_once(
self.speculative_config.method, "Async scheduling not supported with %s-based "
) "speculative decoding and will be disabled.",
else: self.speculative_config.method,
logger.warning( scope="local",
"Async scheduling will be disabled because some features do " )
"not currently work in conjunction with speculative decoding. " self.scheduler_config.async_scheduling = False
"To use async scheduling with spec decoding anyway, " elif (
"enable it explicitly via async_scheduling=True." self.speculative_config is not None
) and self.speculative_config.disable_padded_drafter_batch
):
logger.warning_once(
"Async scheduling is not compatible with "
"disable_padded_drafter_batch=True and will be disabled.",
scope="local",
)
self.scheduler_config.async_scheduling = False self.scheduler_config.async_scheduling = False
elif not executor_supports_async_sched: elif not executor_supports_async_sched:
logger.warning( logger.warning_once(
"Async scheduling will be disabled because it is not supported " "Async scheduling will be disabled because it is not supported "
"with the `%s` distributed executor backend (only `mp`, `uni`, and " "with the `%s` distributed executor backend (only `mp`, `uni`, and "
"`external_launcher` are supported).", "`external_launcher` are supported).",
executor_backend, executor_backend,
scope="local",
) )
self.scheduler_config.async_scheduling = False self.scheduler_config.async_scheduling = False
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment