Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e2ed2388
Unverified
Commit
e2ed2388
authored
Dec 14, 2025
by
Robert Shaw
Committed by
GitHub
Dec 14, 2025
Browse files
Revert "[Fix]Load kv-cache dtype from hf_quant_config.json automatically" (#30653)
parent
174e39ea
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
23 deletions
+2
-23
vllm/utils/torch_utils.py
vllm/utils/torch_utils.py
+2
-23
No files found.
vllm/utils/torch_utils.py
View file @
e2ed2388
...
@@ -194,33 +194,12 @@ def get_kv_cache_torch_dtype(
...
@@ -194,33 +194,12 @@ def get_kv_cache_torch_dtype(
return
torch_dtype
return
torch_dtype
def
get_kv_cache_quant_algo_dtype
(
quant_cfg
:
dict
[
str
,
Any
])
->
torch
.
dtype
|
None
:
quant_method
=
quant_cfg
.
get
(
"quant_method"
,
""
)
if
quant_method
.
startswith
(
"modelopt"
):
quantization_inner
=
quant_cfg
.
get
(
"quantization"
,
quant_cfg
)
# Check if quant config is specified and use kv cache quant algo
kv_algo
=
quantization_inner
.
get
(
"kv_cache_quant_algo"
)
or
quant_cfg
.
get
(
"kv_cache_quant_algo"
)
if
isinstance
(
kv_algo
,
str
):
return
STR_DTYPE_TO_TORCH_DTYPE
[
kv_algo
.
lower
()]
return
None
def
kv_cache_dtype_str_to_dtype
(
def
kv_cache_dtype_str_to_dtype
(
kv_cache_dtype
:
str
,
model_config
:
ModelConfig
kv_cache_dtype
:
str
,
model_config
:
ModelConfig
)
->
torch
.
dtype
:
)
->
torch
.
dtype
:
# Model config may not be specified for unit tests, default to float16
dtype
=
model_config
.
dtype
if
model_config
else
torch
.
half
if
kv_cache_dtype
==
"auto"
:
if
kv_cache_dtype
==
"auto"
:
hf_cfg
=
getattr
(
model_config
,
"hf_config"
,
None
)
# Model config may not be specified for unit tests, default to float16
if
hf_cfg
is
not
None
:
return
model_config
.
dtype
if
model_config
else
torch
.
half
quant_cfg
=
getattr
(
hf_cfg
,
"quantization_config"
,
None
)
if
quant_cfg
is
not
None
:
kv_algo_dtype
=
get_kv_cache_quant_algo_dtype
(
quant_cfg
)
return
kv_algo_dtype
if
kv_algo_dtype
is
not
None
else
dtype
return
dtype
return
STR_DTYPE_TO_TORCH_DTYPE
[
kv_cache_dtype
]
return
STR_DTYPE_TO_TORCH_DTYPE
[
kv_cache_dtype
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment