[V1] TPU - Enable prefix caching by default (#14773)

7888e1d0 · Alexander Matveev · GitHub · 60c872d4 · 7888e1d0
Unverified Commit 7888e1d0 authored Mar 13, 2025 by Alexander Matveev Committed by GitHub Mar 13, 2025
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 6 deletions

vllm/platforms/tpu.py vllm/platforms/tpu.py +0 -6

No files found.
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -108,12 +108,6 @@ class TpuPlatform(Platform):
                    parallel_config.worker_cls = \
                        "vllm.worker.tpu_worker.TPUWorker"
-        # Adjust scheduler config for V1
-        # TODO: Add support for these
-        if envs.VLLM_USE_V1 and vllm_config.cache_config.enable_prefix_caching:
-            logger.warning("[V1][TPU] Disable prefix caching")
-            vllm_config.cache_config.enable_prefix_caching = False
        assert not vllm_config.speculative_config, (
            "Speculative decoding is not yet supported for TPU backend")