remove fp8_e5m2 error

2b90ecd3 · zhuwenwen · 90227352 · 2b90ecd3
Commit 2b90ecd3 authored Oct 29, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

vllm/attention/layer.py vllm/attention/layer.py +3 -3

No files found.
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -123,9 +123,9 @@ class Attention(nn.Module):
            assert isinstance(quant_method, BaseKVCacheMethod)
            # TODO (mgoin): kv cache dtype should be specified in the FP8
            # checkpoint config and become the "auto" behavior
-            if self.kv_cache_dtype == "fp8_e5m2":
-                raise ValueError("fp8_e5m2 kv-cache is not supported with "
-                                 "fp8 checkpoints.")
+            # if self.kv_cache_dtype == "fp8_e5m2":
+            #     raise ValueError("fp8_e5m2 kv-cache is not supported with "
+            #                      "fp8 checkpoints.")
            # If quantization is enabled, we make "k_scale" and "v_scale"
            # parameters so that it can be loaded from the model checkpoint.
            # The k/v_scale will then be converted back to native float32