Fix noisy warning for uncalibrated q_scale/p_scale (#17414)

Signed-off-by: mgoin <mgoin64@gmail.com>

Fix noisy warning for uncalibrated q_scale/p_scale (#17414)
Signed-off-by: mgoin <mgoin64@gmail.com>
4f605a6d · Michael Goin · GitHub · 8342e3ab · 4f605a6d
Unverified Commit 4f605a6d authored May 08, 2025 by Michael Goin Committed by GitHub May 08, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 4 deletions

vllm/model_executor/layers/quantization/kv_cache.py vllm/model_executor/layers/quantization/kv_cache.py +5 -4

No files found.
--- a/vllm/model_executor/layers/quantization/kv_cache.py
+++ b/vllm/model_executor/layers/quantization/kv_cache.py
@@ -124,11 +124,12 @@ class BaseKVCacheMethod(QuantizeMethodBase):
        # These are used in the final Attention.forward()
        layer._q_scale.copy_(q_scale)
        layer._prob_scale.copy_(prob_scale)
-        if q_scale == 1.0 or prob_scale == 1.0:
+        if layer.kv_cache_dtype == "fp8" and (q_scale == 1.0
+                                              or prob_scale == 1.0):
            logger.warning_once(
-                f"Using Q scale {q_scale} and prob scale {prob_scale} "
+                f"Using uncalibrated q_scale {q_scale} and/or prob_scale "
-                "with fp8 attention. This may cause accuracy issues. "
+                f"{prob_scale} with fp8 attention. This may cause accuracy "
-                "Please make sure Q/prob scaling factors are "
+                "issues. Please make sure q/prob scaling factors are "
                "available in the fp8 checkpoint.")
        del layer.k_scale