remove fp8_e5m2 error

653b799b · zhuwenwen · 1851782d · 653b799b
Commit 653b799b authored Oct 31, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

vllm/attention/layer.py vllm/attention/layer.py +3 -3

No files found.
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -170,9 +170,9 @@ class Attention(nn.Module, AttentionLayerBase):
            assert isinstance(quant_method, BaseKVCacheMethod)
            # TODO (mgoin): kv cache dtype should be specified in the FP8
            # checkpoint config and become the "auto" behavior
-            if self.kv_cache_dtype == "fp8_e5m2":
+            # if self.kv_cache_dtype == "fp8_e5m2":
-                raise ValueError("fp8_e5m2 kv-cache is not supported with "
+            #     raise ValueError("fp8_e5m2 kv-cache is not supported with "
-                                 "fp8 checkpoints.")
+            #                      "fp8 checkpoints.")
            # If quantization is enabled, we make "k_scale" and "v_scale"
            # parameters so that it can be loaded from the model checkpoint.
            # The k/v_scale will then be converted back to native float32