Unverified Commit 0abc7948 authored by Zhengxu Chen's avatar Zhengxu Chen Committed by GitHub
Browse files

[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)


Signed-off-by: default avatarzhxchen17 <zhxchen17@fb.com>
parent 4e57c658
......@@ -144,7 +144,7 @@ class CacheConfig:
kv_offloading_backend: KVOffloadingBackend | None = None
"""The backend to use for KV cache offloading. Supported backends include
'native' (vLLM native CPU offloading), 'lmcache' This option must be used
'native' (vLLM native CPU offloading), 'lmcache' This option must be used
together with kv_offloading_size."""
def compute_hash(self) -> str:
......@@ -167,8 +167,6 @@ class CacheConfig:
"num_gpu_blocks_override",
"enable_prefix_caching",
"prefix_caching_hash_algo",
# `cpu_offload_gb` does not use `torch.compile` yet.
"cpu_offload_gb",
"cpu_kvcache_space_bytes",
"mamba_page_size_padded",
# Post-init/derived counters
......
......@@ -345,7 +345,6 @@ class ModelConfig:
"logprobs_mode",
"disable_cascade_attn",
"skip_tokenizer_init",
"enable_prompt_embeds",
"served_model_name",
"config_format",
"hf_token",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment