[JAX] Tighten Encoder Test tolerances (#1955)

tighten encoder test tols Signed-off-by: Phuong Nguyen <phuonguyen@nvidia.com>

[JAX] Tighten Encoder Test tolerances (#1955)
tighten encoder test tols Signed-off-by: Phuong Nguyen <phuonguyen@nvidia.com>
ed75c2b0 · Phuong Nguyen · GitHub · 07afda98 · ed75c2b0 · ed75c2b0
Unverified Commit ed75c2b0 authored Jul 17, 2025 by Phuong Nguyen Committed by GitHub Jul 17, 2025
Showing with 20 additions and 20 deletions

examples/jax/encoder/test_model_parallel_encoder.py examples/jax/encoder/test_model_parallel_encoder.py +11 -11

examples/jax/encoder/test_multigpu_encoder.py examples/jax/encoder/test_multigpu_encoder.py +9 -9

No files found.
--- a/examples/jax/encoder/test_model_parallel_encoder.py
+++ b/examples/jax/encoder/test_model_parallel_encoder.py
@@ -474,7 +474,7 @@ class TestEncoder(unittest.TestCase):
    def test_te_bf16(self):
        """Test Transformer Engine with BF16"""
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_fp8_supported, fp8_reason)
    def test_te_delayed_scaling_fp8(self):
@@ -482,7 +482,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "DelayedScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_mxfp8_supported, mxfp8_reason)
    def test_te_mxfp8(self):
@@ -490,14 +490,14 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "MXFP8BlockScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_bf16_supported(), "Device compute capability 8.0+ is required for BF16")
    def test_te_bf16_with_sp(self):
        """Test Transformer Engine with BF16 + SP"""
        self.args.enable_sp = True
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_fp8_supported, fp8_reason)
    def test_te_delayed_scaling_fp8_with_sp(self):
@@ -506,7 +506,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "DelayedScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_mxfp8_supported, mxfp8_reason)
    def test_te_mxfp8_with_sp(self):
@@ -515,14 +515,14 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "MXFP8BlockScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_bf16_supported(), "Device compute capability 8.0+ is required for BF16")
    def test_te_bf16_shardy(self):
        """Test Transformer Engine with BF16"""
        self.args.enable_shardy = True
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_fp8_supported, fp8_reason)
    def test_te_delayed_scaling_fp8_shardy(self):
@@ -531,7 +531,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "DelayedScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_fp8_supported, fp8_reason)
    def test_te_delayed_scaling_fp8_with_sp_shardy(self):
@@ -541,7 +541,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "DelayedScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_mxfp8_supported, mxfp8_reason)
    @unittest.skipIf(
@@ -553,7 +553,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "MXFP8BlockScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
    @unittest.skipIf(not is_mxfp8_supported, mxfp8_reason)
    @unittest.skipIf(
@@ -566,7 +566,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "MXFP8BlockScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.43 and actual[1] > 0.80
+        assert actual[0] < 0.39 and actual[1] > 0.83
 if __name__ == "__main__":

--- a/examples/jax/encoder/test_multigpu_encoder.py
+++ b/examples/jax/encoder/test_multigpu_encoder.py
@@ -435,13 +435,13 @@ class TestEncoder(unittest.TestCase):
    def setUp(self):
        """Run 5 epochs for testing"""
-        self.args = encoder_parser(["--epochs", "6"])
+        self.args = encoder_parser(["--epochs", "5"])
    @unittest.skipIf(not is_bf16_supported(), "Device compute capability 8.0+ is required for BF16")
    def test_te_bf16(self):
        """Test Transformer Engine with BF16"""
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.50 and actual[1] > 0.75
+        assert actual[0] < 0.52 and actual[1] > 0.74
    @unittest.skipIf(not is_fp8_supported, fp8_reason)
    def test_te_delayed_scaling_fp8(self):
@@ -449,7 +449,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "DelayedScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.50 and actual[1] > 0.75
+        assert actual[0] < 0.52 and actual[1] > 0.74
    @unittest.skipIf(not is_fp8_supported, fp8_reason)
    def test_te_current_scaling_fp8(self):
@@ -457,7 +457,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "Float8CurrentScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.50 and actual[1] > 0.75
+        assert actual[0] < 0.52 and actual[1] > 0.74
    @unittest.skipIf(not is_mxfp8_supported, mxfp8_reason)
    def test_te_mxfp8(self):
@@ -465,14 +465,14 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "MXFP8BlockScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.50 and actual[1] > 0.75
+        assert actual[0] < 0.52 and actual[1] > 0.74
    @unittest.skipIf(not is_bf16_supported(), "Device compute capability 8.0+ is required for BF16")
    def test_te_bf16_shardy(self):
        """Test Transformer Engine with BF16"""
        self.args.enable_shardy = True
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.50 and actual[1] > 0.75
+        assert actual[0] < 0.52 and actual[1] > 0.74
    @unittest.skipIf(not is_fp8_supported, fp8_reason)
    def test_te_delayed_scaling_fp8_shardy(self):
@@ -481,7 +481,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "DelayedScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.50 and actual[1] > 0.75
+        assert actual[0] < 0.52 and actual[1] > 0.74
    @unittest.skipIf(not is_fp8_supported, fp8_reason)
    def test_te_current_scaling_fp8_shardy(self):
@@ -490,7 +490,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "Float8CurrentScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.50 and actual[1] > 0.75
+        assert actual[0] < 0.52 and actual[1] > 0.74
    @unittest.skipIf(not is_mxfp8_supported, mxfp8_reason)
    @unittest.skipIf(
@@ -502,7 +502,7 @@ class TestEncoder(unittest.TestCase):
        self.args.use_fp8 = True
        self.args.fp8_recipe = "MXFP8BlockScaling"
        actual = train_and_evaluate(self.args)
-        assert actual[0] < 0.50 and actual[1] > 0.75
+        assert actual[0] < 0.52 and actual[1] > 0.74
 if __name__ == "__main__":