Unverified Commit 4f605a6d authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

Fix noisy warning for uncalibrated q_scale/p_scale (#17414)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent 8342e3ab
...@@ -124,11 +124,12 @@ class BaseKVCacheMethod(QuantizeMethodBase): ...@@ -124,11 +124,12 @@ class BaseKVCacheMethod(QuantizeMethodBase):
# These are used in the final Attention.forward() # These are used in the final Attention.forward()
layer._q_scale.copy_(q_scale) layer._q_scale.copy_(q_scale)
layer._prob_scale.copy_(prob_scale) layer._prob_scale.copy_(prob_scale)
if q_scale == 1.0 or prob_scale == 1.0: if layer.kv_cache_dtype == "fp8" and (q_scale == 1.0
or prob_scale == 1.0):
logger.warning_once( logger.warning_once(
f"Using Q scale {q_scale} and prob scale {prob_scale} " f"Using uncalibrated q_scale {q_scale} and/or prob_scale "
"with fp8 attention. This may cause accuracy issues. " f"{prob_scale} with fp8 attention. This may cause accuracy "
"Please make sure Q/prob scaling factors are " "issues. Please make sure q/prob scaling factors are "
"available in the fp8 checkpoint.") "available in the fp8 checkpoint.")
del layer.k_scale del layer.k_scale
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment