Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0c961487
Unverified
Commit
0c961487
authored
Jan 10, 2026
by
roikoren755
Committed by
GitHub
Jan 10, 2026
Browse files
Update modelopt KV cache quantization resolution to new scheme (#31895)
Signed-off-by:
Roi Koren
<
roik@nvidia.com
>
parent
583a90e0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
2 deletions
+21
-2
vllm/utils/torch_utils.py
vllm/utils/torch_utils.py
+21
-2
No files found.
vllm/utils/torch_utils.py
View file @
0c961487
...
@@ -219,9 +219,28 @@ def get_kv_cache_quant_algo_string(quant_cfg: dict[str, Any]) -> str | None:
...
@@ -219,9 +219,28 @@ def get_kv_cache_quant_algo_string(quant_cfg: dict[str, Any]) -> str | None:
if
quant_method
.
startswith
(
"modelopt"
):
if
quant_method
.
startswith
(
"modelopt"
):
quantization_inner
=
quant_cfg
.
get
(
"quantization"
,
quant_cfg
)
quantization_inner
=
quant_cfg
.
get
(
"quantization"
,
quant_cfg
)
# Check if quant config is specified and use kv cache quant algo
# Check if quant config is specified and use kv cache quant algo
kv_algo
=
quantization_inner
.
get
(
"kv_cache_quant_algo"
)
or
quant_cfg
.
get
(
kv_algo
=
(
"kv_cache_quant_algo"
quantization_inner
.
get
(
"kv_cache_scheme"
)
or
quant_cfg
.
get
(
"kv_cache_scheme"
)
or
quantization_inner
.
get
(
"kv_cache_quant_algo"
)
or
quant_cfg
.
get
(
"kv_cache_quant_algo"
)
)
)
if
isinstance
(
kv_algo
,
dict
):
if
(
kv_algo
.
get
(
"dynamic"
)
is
False
and
kv_algo
.
get
(
"num_bits"
)
==
8
and
kv_algo
.
get
(
"type"
)
==
"float"
):
kv_algo
=
"fp8"
else
:
# Unknown/unsupported format - return "auto" as safe fallback
logger
.
warning
(
"WARNING: Unknown kv_cache_quant_algo '%s' in model "
"config. Supported values: %s. Falling back to 'auto'."
,
f
"
{
kv_algo
}
"
,
list
(
MODELOPT_TO_VLLM_KV_CACHE_DTYPE_MAP
.
keys
()),
)
return
"auto"
if
isinstance
(
kv_algo
,
str
):
if
isinstance
(
kv_algo
,
str
):
kv_algo_lower
=
kv_algo
.
lower
()
kv_algo_lower
=
kv_algo
.
lower
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment