Revert "Use internal quantizer for input to the modules" (#1555)

Revert "Use internal quantizer for input to the modules (#1551)" This reverts commit b3e70353 . Signed-off-by: Przemek Tredak <ptredak@nvidia.com>

Revert "Use internal quantizer for input to the modules" (#1555)
Revert "Use internal quantizer for input to the modules (#1551)" This reverts commit b3e70353 . Signed-off-by: Przemek Tredak <ptredak@nvidia.com>
f3a009da · Przemyslaw Tredak · GitHub · 314ab9a8 · f3a009da · f3a009da
Unverified Commit f3a009da authored Mar 10, 2025 by Przemyslaw Tredak Committed by GitHub Mar 10, 2025
3 changed files
--- a/transformer_engine/pytorch/module/layernorm_linear.py
+++ b/transformer_engine/pytorch/module/layernorm_linear.py
@@ -1358,7 +1358,7 @@ class LayerNormLinear(TransformerEngineBaseModule):
        grad_output_quantizer = None
        output_quantizer = None
        input_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_INPUT]
-        input_quantizer.internal = True
+        input_quantizer.internal = False
        weight_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_WEIGHT]
        weight_quantizer.internal = True
        if fp8_output:

--- a/transformer_engine/pytorch/module/layernorm_mlp.py
+++ b/transformer_engine/pytorch/module/layernorm_mlp.py
@@ -1528,7 +1528,7 @@ class LayerNormMLP(TransformerEngineBaseModule):
        ) = [None] * 8
        if self.fp8:
            fc1_input_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_INPUT]
-            fc1_input_quantizer.internal = True
+            fc1_input_quantizer.internal = False  # temporary
            fc1_weight_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_WEIGHT]
            fc1_weight_quantizer.internal = True
            fc2_input_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM2_INPUT]

--- a/transformer_engine/pytorch/module/linear.py
+++ b/transformer_engine/pytorch/module/linear.py
@@ -1136,7 +1136,7 @@ class Linear(TransformerEngineBaseModule):
        grad_output_quantizer = None
        output_quantizer = None
        input_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_INPUT]
-        input_quantizer.internal = True
+        input_quantizer.internal = False
        weight_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_WEIGHT]
        weight_quantizer.internal = True
        if fp8_output: