Unverified Commit 7df02897 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

Change warning logs to debug for unimplemented MXFP4 Linear/Attention (#29441)


Signed-off-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
parent 0abc7948
...@@ -196,9 +196,10 @@ class Mxfp4Config(QuantizationConfig): ...@@ -196,9 +196,10 @@ class Mxfp4Config(QuantizationConfig):
# TODO: Add support for MXFP4 Linear Method. # TODO: Add support for MXFP4 Linear Method.
# MXFP4 LinearMethod is available in AMD-Quark, refer to that implementation # MXFP4 LinearMethod is available in AMD-Quark, refer to that implementation
# if you are interested in enabling MXFP4 here. # if you are interested in enabling MXFP4 here.
logger.warning_once( logger.debug_once(
"MXFP4 linear layer is not implemented - falling back to " "MXFP4 linear layer is not implemented - falling back to "
"UnquantizedLinearMethod." "UnquantizedLinearMethod.",
scope="local",
) )
return UnquantizedLinearMethod() return UnquantizedLinearMethod()
elif isinstance(layer, FusedMoE): elif isinstance(layer, FusedMoE):
...@@ -208,9 +209,10 @@ class Mxfp4Config(QuantizationConfig): ...@@ -208,9 +209,10 @@ class Mxfp4Config(QuantizationConfig):
return Mxfp4MoEMethod(layer.moe_config) return Mxfp4MoEMethod(layer.moe_config)
elif isinstance(layer, Attention): elif isinstance(layer, Attention):
# TODO: Add support for MXFP4 Attention. # TODO: Add support for MXFP4 Attention.
logger.warning_once( logger.debug_once(
"MXFP4 attention layer is not implemented. " "MXFP4 attention layer is not implemented. "
"Skipping quantization for this layer." "Skipping quantization for this layer.",
scope="local",
) )
return None return None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment