Unverified Commit 7888e1d0 authored by Alexander Matveev's avatar Alexander Matveev Committed by GitHub
Browse files

[V1] TPU - Enable prefix caching by default (#14773)

parent 60c872d4
...@@ -108,12 +108,6 @@ class TpuPlatform(Platform): ...@@ -108,12 +108,6 @@ class TpuPlatform(Platform):
parallel_config.worker_cls = \ parallel_config.worker_cls = \
"vllm.worker.tpu_worker.TPUWorker" "vllm.worker.tpu_worker.TPUWorker"
# Adjust scheduler config for V1
# TODO: Add support for these
if envs.VLLM_USE_V1 and vllm_config.cache_config.enable_prefix_caching:
logger.warning("[V1][TPU] Disable prefix caching")
vllm_config.cache_config.enable_prefix_caching = False
assert not vllm_config.speculative_config, ( assert not vllm_config.speculative_config, (
"Speculative decoding is not yet supported for TPU backend") "Speculative decoding is not yet supported for TPU backend")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment