Unverified Commit c2bba690 authored by Lucas Wilkinson's avatar Lucas Wilkinson Committed by GitHub
Browse files

[BugFix] Disable fp8 kv-cache by default for DeepSeek V3.2 (#27121)


Signed-off-by: default avatarLucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: default avatarLucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent e133d6d2
...@@ -481,12 +481,9 @@ class DeepseekV32ForCausalLM(VerifyAndUpdateConfig): ...@@ -481,12 +481,9 @@ class DeepseekV32ForCausalLM(VerifyAndUpdateConfig):
is_v32 = hasattr(hf_config, "index_topk") is_v32 = hasattr(hf_config, "index_topk")
assert is_v32 assert is_v32
# For DeepSeekV3.2, we use a custom fp8 format as default (i.e. # For DeepSeekV3.2, a custom fp8 format is used when fp8 kv-cache is enabled.
# "auto")
cache_config = vllm_config.cache_config cache_config = vllm_config.cache_config
if cache_config.cache_dtype == "auto" or cache_config.cache_dtype.startswith( if cache_config.cache_dtype.startswith("fp8"):
"fp8"
):
cache_config.cache_dtype = "fp8_ds_mla" cache_config.cache_dtype = "fp8_ds_mla"
logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2") logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2")
if cache_config.cache_dtype == "bfloat16": if cache_config.cache_dtype == "bfloat16":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment