ONNX: Fix FP8 quantization for the second MLP in LayerNormMLP (#2577)

ONNX: Fix FP8 quantization for the second MLP in LayernormMLP Signed-off-by: Victor Oliveira <victor.oliveira@getcruise.com>

ONNX: Fix FP8 quantization for the second MLP in LayerNormMLP (#2577)
ONNX: Fix FP8 quantization for the second MLP in LayernormMLP Signed-off-by: Victor Oliveira <victor.oliveira@getcruise.com>
69636a08 · Victor Oliveira · GitHub · fe8fad59 · 69636a08
Unverified Commit 69636a08 authored Jan 13, 2026 by Victor Oliveira Committed by GitHub Jan 13, 2026
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 3 deletions

transformer_engine/pytorch/module/layernorm_mlp.py transformer_engine/pytorch/module/layernorm_mlp.py +12 -3

No files found.
--- a/transformer_engine/pytorch/module/layernorm_mlp.py
+++ b/transformer_engine/pytorch/module/layernorm_mlp.py
@@ -2243,14 +2243,23 @@ class LayerNormMLP(TransformerEngineBaseModule):
        assert not TEDebugState.debug_enabled, "Debug mode is not supported in ONNX export"
        assert_warmed_up(self)
+        # Get quantizers
        (
            fc1_input_quantizer,
            fc1_weight_quantizer,
+            _,
+            _,
+            _,
+            _,
            fc2_input_quantizer,
            fc2_weight_quantizer,
-            output_quantizer,
+            fc2_output_quantizer,
-            *_,
+            _,
+            _,
+            _,
        ) = self._get_quantizers(False, is_grad_enabled)
        inp_dtype = inp.dtype
        fc1_weight, fc2_weight = self._get_weight_tensors()
@@ -2324,7 +2333,7 @@ class LayerNormMLP(TransformerEngineBaseModule):
        fc2_out = onnx_gemm(fc2_weight, act_out, fc2_bias)
-        if output_quantizer is not None:
+        if fc2_output_quantizer is not None:
            raise NotImplementedError("ONNX export of quantized output is not supported")
        if self.return_layernorm_output: