Unverified Commit 0abc7948 authored by Zhengxu Chen's avatar Zhengxu Chen Committed by GitHub
Browse files

[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)


Signed-off-by: default avatarzhxchen17 <zhxchen17@fb.com>
parent 4e57c658
...@@ -144,7 +144,7 @@ class CacheConfig: ...@@ -144,7 +144,7 @@ class CacheConfig:
kv_offloading_backend: KVOffloadingBackend | None = None kv_offloading_backend: KVOffloadingBackend | None = None
"""The backend to use for KV cache offloading. Supported backends include """The backend to use for KV cache offloading. Supported backends include
'native' (vLLM native CPU offloading), 'lmcache' This option must be used 'native' (vLLM native CPU offloading), 'lmcache' This option must be used
together with kv_offloading_size.""" together with kv_offloading_size."""
def compute_hash(self) -> str: def compute_hash(self) -> str:
...@@ -167,8 +167,6 @@ class CacheConfig: ...@@ -167,8 +167,6 @@ class CacheConfig:
"num_gpu_blocks_override", "num_gpu_blocks_override",
"enable_prefix_caching", "enable_prefix_caching",
"prefix_caching_hash_algo", "prefix_caching_hash_algo",
# `cpu_offload_gb` does not use `torch.compile` yet.
"cpu_offload_gb",
"cpu_kvcache_space_bytes", "cpu_kvcache_space_bytes",
"mamba_page_size_padded", "mamba_page_size_padded",
# Post-init/derived counters # Post-init/derived counters
......
...@@ -345,7 +345,6 @@ class ModelConfig: ...@@ -345,7 +345,6 @@ class ModelConfig:
"logprobs_mode", "logprobs_mode",
"disable_cascade_attn", "disable_cascade_attn",
"skip_tokenizer_init", "skip_tokenizer_init",
"enable_prompt_embeds",
"served_model_name", "served_model_name",
"config_format", "config_format",
"hf_token", "hf_token",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment