Unverified Commit 7df02897 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

Change warning logs to debug for unimplemented MXFP4 Linear/Attention (#29441)


Signed-off-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
parent 0abc7948
......@@ -196,9 +196,10 @@ class Mxfp4Config(QuantizationConfig):
# TODO: Add support for MXFP4 Linear Method.
# MXFP4 LinearMethod is available in AMD-Quark, refer to that implementation
# if you are interested in enabling MXFP4 here.
logger.warning_once(
logger.debug_once(
"MXFP4 linear layer is not implemented - falling back to "
"UnquantizedLinearMethod."
"UnquantizedLinearMethod.",
scope="local",
)
return UnquantizedLinearMethod()
elif isinstance(layer, FusedMoE):
......@@ -208,9 +209,10 @@ class Mxfp4Config(QuantizationConfig):
return Mxfp4MoEMethod(layer.moe_config)
elif isinstance(layer, Attention):
# TODO: Add support for MXFP4 Attention.
logger.warning_once(
logger.debug_once(
"MXFP4 attention layer is not implemented. "
"Skipping quantization for this layer."
"Skipping quantization for this layer.",
scope="local",
)
return None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment