update kv_cache_dtype support

aaf8c95f · zhuwenwen · ef8dd155 · aaf8c95f
Commit aaf8c95f authored Mar 03, 2026 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/attention/layer.py vllm/attention/layer.py +2 -2

No files found.
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -130,8 +130,8 @@ def _init_kv_cache_quant(
        assert isinstance(quant_method, BaseKVCacheMethod)
        # TODO (mgoin): kv cache dtype should be specified in the FP8
        # checkpoint config and become the "auto" behavior
-        if layer.kv_cache_dtype == "fp8_e5m2":
-            raise ValueError("fp8_e5m2 kv-cache is not supported with fp8 checkpoints.")
+        # if layer.kv_cache_dtype == "fp8_e5m2":
+        #     raise ValueError("fp8_e5m2 kv-cache is not supported with fp8 checkpoints.")
        # If quantization is enabled, we make "k_scale" and "v_scale"
        # parameters so that it can be loaded from the model checkpoint.
        # The k/v_scale will then be converted back to native float32