Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8d7da92f
Unverified
Commit
8d7da92f
authored
Oct 01, 2025
by
Lucas Wilkinson
Committed by
GitHub
Sep 30, 2025
Browse files
[BugFix] Fix default kv-cache-dtype default for DeepseekV3.2 (#25988)
Signed-off-by:
Lucas Wilkinson
<
lwilkins@redhat.com
>
parent
e952eee6
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
14 deletions
+14
-14
vllm/model_executor/models/config.py
vllm/model_executor/models/config.py
+14
-14
No files found.
vllm/model_executor/models/config.py
View file @
8d7da92f
...
...
@@ -400,7 +400,7 @@ class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig):
"exactly equal."
,
mamba_padding_pct
)
class
DeepseekV3ForCausalLM
(
VerifyAndUpdateConfig
):
class
DeepseekV3
2
ForCausalLM
(
VerifyAndUpdateConfig
):
@
classmethod
def
verify_and_update_config
(
cls
,
vllm_config
:
"VllmConfig"
)
->
None
:
...
...
@@ -409,17 +409,17 @@ class DeepseekV3ForCausalLM(VerifyAndUpdateConfig):
"""
hf_config
=
vllm_config
.
model_config
.
hf_config
# Mirror the check in vllm/model_executor/models/deepseek_v2.py
is_v32
=
hasattr
(
hf_config
,
"index_topk"
)
assert
is_v32
if
is_v32
:
# For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
# "auto")
cache_config
=
vllm_config
.
cache_config
if
cache_config
.
cache_dtype
==
"auto"
or
\
cache_config
.
cache_dtype
.
startswith
(
"fp8"
):
cache_config
.
cache_dtype
=
"fp8_ds_mla"
logger
.
info
(
"Using custom fp8 kv-cache format for DeepSeekV3.2"
)
logger
.
info
(
"Using custom fp8 kv-cache format for DeepSeekV3.2"
)
if
cache_config
.
cache_dtype
==
"bfloat16"
:
cache_config
.
cache_dtype
=
"auto"
logger
.
info
(
"Using bfloat16 kv-cache for DeepSeekV3.2"
)
...
...
@@ -441,5 +441,5 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
"MambaForCausalLM"
:
MambaModelConfig
,
"Mamba2ForCausalLM"
:
MambaModelConfig
,
"FalconMambaForCausalLM"
:
MambaModelConfig
,
"DeepseekV3ForCausalLM"
:
DeepseekV3ForCausalLM
,
"DeepseekV3
2
ForCausalLM"
:
DeepseekV3
2
ForCausalLM
,
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment