[CI Failure] fix_test_auto_prefix_cache_support (#26053)

Signed-off-by: Huamin Li <3ericli@gmail.com>

[CI Failure] fix_test_auto_prefix_cache_support (#26053)
Signed-off-by: Huamin Li <3ericli@gmail.com>
7d6b0338 · Huamin Li · GitHub · 7c2e91c4 · 7d6b0338 · 7d6b0338
Unverified Commit 7d6b0338 authored Oct 04, 2025 by Huamin Li Committed by GitHub Oct 04, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 7 deletions

tests/v1/core/test_scheduler.py tests/v1/core/test_scheduler.py +2 -2

vllm/config/vllm.py vllm/config/vllm.py +12 -5

No files found.
--- a/tests/v1/core/test_scheduler.py
+++ b/tests/v1/core/test_scheduler.py
@@ -1917,7 +1917,7 @@ def test_priority_scheduling_preemption_when_out_of_kv():
 def test_chunked_prefill_disabled_for_encoder_decoder(
        enable_chunked_prefill: bool, is_encoder_decoder: bool,
        expect_enabled: bool) -> None:
-    """Validate that chunked prefill is appropriately disabled for 
+    """Validate that chunked prefill is appropriately disabled for
    encoder-decoder models."""
    scheduler_config = SchedulerConfig(
        enable_chunked_prefill=enable_chunked_prefill,
@@ -1942,7 +1942,7 @@ def test_chunked_prefill_disabled_for_encoder_decoder(
 def _validate_chunked_prefill_settings_for_encoder_decoder(
        scheduler_config: SchedulerConfig, is_encoder_decoder: bool,
        expect_enabled: bool) -> None:
-    """Validate chunked prefill settings in the scheduler config for 
+    """Validate chunked prefill settings in the scheduler config for
    encoder-decoder models."""
    assert scheduler_config.chunked_prefill_enabled is expect_enabled
    assert scheduler_config.enable_chunked_prefill is expect_enabled

--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -396,10 +396,17 @@ class VllmConfig:
                        "try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
                        "to 'spawn'.")
-        # Disable prefix caching only if chunked prefill is explicitly disabled
+        # Final off-switch for CP/APC:
-        # (and not merely unset)
+        # Disable for (a) collected blockers, (b) encoder–decoder, or
-        if (self.scheduler_config.chunked_prefill_enabled is False
+        # (c) explicit CP=False when APC wasn't requested.
-                or disable_chunked_prefill_reasons):
+        # Do NOT disable merely because the resolved CP flag is False.
+        apc_requested = (self.cache_config is not None
+                         and self.cache_config.enable_prefix_caching)
+        if (disable_chunked_prefill_reasons
+                or (self.model_config is not None
+                    and self.model_config.is_encoder_decoder)
+                or (self.scheduler_config.enable_chunked_prefill is False
+                    and not apc_requested)):
            for reason in disable_chunked_prefill_reasons:
                logger.info(reason)
            self.scheduler_config.chunked_prefill_enabled = False
@@ -668,7 +675,7 @@ class VllmConfig:
                                 f"Model: {self.model_config.model}")
    def compile_debug_dump_path(self) -> Optional[Path]:
-        """Returns a rank-aware path for dumping 
+        """Returns a rank-aware path for dumping
        torch.compile debug information.
        """
        if self.compilation_config.debug_dump_path is None: