[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)

Signed-off-by: zhxchen17 <zhxchen17@fb.com>

[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)
Signed-off-by: zhxchen17 <zhxchen17@fb.com>
0abc7948 · Zhengxu Chen · GitHub · 4e57c658 · 0abc7948 · 0abc7948
Unverified Commit 0abc7948 authored Nov 25, 2025 by Zhengxu Chen Committed by GitHub Nov 25, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 4 deletions

vllm/config/cache.py vllm/config/cache.py +1 -3

vllm/config/model.py vllm/config/model.py +0 -1

No files found.
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@@ -144,7 +144,7 @@ class CacheConfig:

    kv_offloading_backend: KVOffloadingBackend | None = None
    """The backend to use for KV cache offloading. Supported backends include
-    'native' (vLLM native CPU offloading), 'lmcache' This option must be used 
+    'native' (vLLM native CPU offloading), 'lmcache' This option must be used
    together with kv_offloading_size."""

    def compute_hash(self) -> str:
@@ -167,8 +167,6 @@ class CacheConfig:
            "num_gpu_blocks_override",
            "enable_prefix_caching",
            "prefix_caching_hash_algo",
-            # `cpu_offload_gb` does not use `torch.compile` yet.
-            "cpu_offload_gb",
            "cpu_kvcache_space_bytes",
            "mamba_page_size_padded",
            # Post-init/derived counters

--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -345,7 +345,6 @@ class ModelConfig:
            "logprobs_mode",
            "disable_cascade_attn",
            "skip_tokenizer_init",
-            "enable_prompt_embeds",
            "served_model_name",
            "config_format",
            "hf_token",