Unverified Commit f3a009da authored by Przemyslaw Tredak's avatar Przemyslaw Tredak Committed by GitHub
Browse files

Revert "Use internal quantizer for input to the modules" (#1555)

Revert "Use internal quantizer for input to the modules (#1551)"

This reverts commit b3e70353

.
Signed-off-by: default avatarPrzemek Tredak <ptredak@nvidia.com>
parent 314ab9a8
......@@ -1358,7 +1358,7 @@ class LayerNormLinear(TransformerEngineBaseModule):
grad_output_quantizer = None
output_quantizer = None
input_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_INPUT]
input_quantizer.internal = True
input_quantizer.internal = False
weight_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_WEIGHT]
weight_quantizer.internal = True
if fp8_output:
......
......@@ -1528,7 +1528,7 @@ class LayerNormMLP(TransformerEngineBaseModule):
) = [None] * 8
if self.fp8:
fc1_input_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_INPUT]
fc1_input_quantizer.internal = True
fc1_input_quantizer.internal = False # temporary
fc1_weight_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_WEIGHT]
fc1_weight_quantizer.internal = True
fc2_input_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM2_INPUT]
......
......@@ -1136,7 +1136,7 @@ class Linear(TransformerEngineBaseModule):
grad_output_quantizer = None
output_quantizer = None
input_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_INPUT]
input_quantizer.internal = True
input_quantizer.internal = False
weight_quantizer = self.quantizers["scaling_fwd"][tex.FP8FwdTensors.GEMM1_WEIGHT]
weight_quantizer.internal = True
if fp8_output:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment