Unverified Commit ee3c0c83 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Pooling] Disable async scheduling by default for pooling models (#39592)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent cc07dad7
...@@ -764,6 +764,16 @@ class VllmConfig: ...@@ -764,6 +764,16 @@ class VllmConfig:
elif self.scheduler_config.async_scheduling is None: elif self.scheduler_config.async_scheduling is None:
# Enable async scheduling unless there is an incompatible option. # Enable async scheduling unless there is an incompatible option.
if ( if (
self.model_config is not None
and self.model_config.runner_type == "pooling"
):
# The current implementation of asynchronous scheduling negatively
# impacts performance of pooling models, so we disable by default.
logger.debug(
"Disabling asynchronous scheduling by default for pooling model."
)
self.scheduler_config.async_scheduling = False
elif (
self.speculative_config is not None self.speculative_config is not None
and self.speculative_config.method not in get_args(EagleModelTypes) and self.speculative_config.method not in get_args(EagleModelTypes)
and self.speculative_config.method not in get_args(NgramGPUTypes) and self.speculative_config.method not in get_args(NgramGPUTypes)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment