Unverified Commit ec51831a authored by Harry Huang's avatar Harry Huang Committed by GitHub
Browse files

[BugFix] Disable async scheduling for Mamba prefix caching (#33352)


Signed-off-by: default avatarhuanghaoyan.hhy <huanghaoyan.hhy@alibaba-inc.com>
parent 80b918f2
......@@ -619,6 +619,11 @@ class VllmConfig:
"`external_launcher` distributed executor backend, but you chose "
f"`{executor_backend}`."
)
if self.cache_config.mamba_cache_mode != "none":
raise ValueError(
"Currently, async scheduling is not compatible with "
"prefix caching for Mamba models."
)
elif self.scheduler_config.async_scheduling is None:
# Enable async scheduling unless there is an incompatible option.
if (
......@@ -651,6 +656,13 @@ class VllmConfig:
scope="local",
)
self.scheduler_config.async_scheduling = False
elif self.cache_config.mamba_cache_mode != "none":
logger.warning_once(
"Async scheduling is not compatible with "
"prefix caching for Mamba models and will be disabled.",
scope="local",
)
self.scheduler_config.async_scheduling = False
else:
self.scheduler_config.async_scheduling = True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment