[Pooling] Disable async scheduling by default for pooling models (#39592)

Signed-off-by: Nick Hill <nickhill123@gmail.com>

[Pooling] Disable async scheduling by default for pooling models (#39592)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
ee3c0c83 · Nick Hill · GitHub · cc07dad7 · ee3c0c83
Unverified Commit ee3c0c83 authored Apr 12, 2026 by Nick Hill Committed by GitHub Apr 12, 2026
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 0 deletions

vllm/config/vllm.py vllm/config/vllm.py +10 -0

No files found.
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -764,6 +764,16 @@ class VllmConfig:
        elif self.scheduler_config.async_scheduling is None:
            # Enable async scheduling unless there is an incompatible option.
            if (
+                self.model_config is not None
+                and self.model_config.runner_type == "pooling"
+            ):
+                # The current implementation of asynchronous scheduling negatively
+                # impacts performance of pooling models, so we disable by default.
+                logger.debug(
+                    "Disabling asynchronous scheduling by default for pooling model."
+                )
+                self.scheduler_config.async_scheduling = False
+            elif (
                self.speculative_config is not None
                and self.speculative_config.method not in get_args(EagleModelTypes)
                and self.speculative_config.method not in get_args(NgramGPUTypes)