Un-skip some tests and skip some flaky tests

1b7b02ef · hubertlu-tw · 4cfbe05c · 1b7b02ef · 1b7b02ef · 1b7b02ef
Commit 1b7b02ef authored Aug 08, 2022 by hubertlu-tw
3 changed files
--- a/tests/L0/run_amp/test_checkpointing.py
+++ b/tests/L0/run_amp/test_checkpointing.py
@@ -8,7 +8,7 @@ import torch.optim as optim
 from apex import amp
 from utils import common_init, FLOAT
+from apex.testing.common_utils import skipFlakyTest
 class MyModel(torch.nn.Module):
    def __init__(self):
@@ -161,6 +161,7 @@ class TestCheckpointing(unittest.TestCase):
                            # skip tests for different opt_levels
                            continue
+    @skipFlakyTest
    def test_loss_scale_decrease(self):
        num_losses = 3
        nb_decrease_loss_scales = [0, 1, 2]

--- a/tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
+++ b/tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
@@ -4,7 +4,7 @@ import unittest
 import torch
 import apex
+from apex.testing.common_utils import skipFlakyTest
 class TestFusedLayerNorm(unittest.TestCase):
    dtype = torch.float
@@ -188,6 +188,7 @@ class TestFusedRMSNormElemWiseHalf(TestFusedRMSNormElemWise):
        self.skipTest("Skip to save time")
+@skipFlakyTest
 class TestFusedLayerNormElemWiseBFloat16(TestFusedLayerNormElemWise):
    dtype = torch.bfloat16
    # NOTE (mkozuki): [BFloat16 Layer Norm flakiness]

--- a/tests/L0/run_mlp/test_mlp.py
+++ b/tests/L0/run_mlp/test_mlp.py
@@ -7,7 +7,7 @@ import torch
 from torch import nn
 from apex.mlp import MLP
-from apex.testing.common_utils import skipIfRocm
+from apex.testing.common_utils import skipFlakyTest
 batch_size = 1024
 mlp_sizes = [480, 1024, 1024, 512, 256, 1]
@@ -18,7 +18,6 @@ class TestMLP(unittest.TestCase):
    def test_creation(self):
        MLP(mlp_sizes)
-    @skipIfRocm
    def test_numeric(self):
        mlp = MLP(mlp_sizes).cuda()
@@ -53,7 +52,6 @@ class TestMLP(unittest.TestCase):
            ref_mlp[0].bias.grad.detach().cpu().numpy(),
            atol=1e-7, rtol=1e-5)
-    @skipIfRocm
    def test_no_bias(self):
        for use_activation in ['none', 'relu', 'sigmoid']:
            mlp = MLP(mlp_sizes, bias=False, activation=use_activation).cuda()
@@ -91,7 +89,7 @@ class TestMLP(unittest.TestCase):
                ref_mlp[0].weight.grad.detach().cpu().numpy(),
                atol=1e-7, rtol=100)
-    @skipIfRocm
+    @skipFlakyTest
    def test_with_bias(self):
        for use_activation in ['none', 'relu', 'sigmoid']:
            mlp = MLP(mlp_sizes, bias=True, activation=use_activation).cuda()
@@ -134,7 +132,6 @@ class TestMLP(unittest.TestCase):
                ref_mlp[0].bias.grad.detach().cpu().numpy(),
                atol=1e-7, rtol=1e-5)
-    @skipIfRocm
    def test_no_grad(self):
        mlp = MLP(mlp_sizes).cuda()
@@ -165,7 +162,6 @@ class TestMLP(unittest.TestCase):
            ref_mlp[0].weight.grad.detach().cpu().numpy(),
            atol=1e-7, rtol=1e-5)
-    @skipIfRocm
    def test_performance_half(self):
        mlp = MLP(mlp_sizes).cuda().half()
@@ -195,7 +191,7 @@ class TestMLP(unittest.TestCase):
            mlp.zero_grad()
            test_loss.backward()
-        torch.cuda.profiler.start()
+        #torch.cuda.profiler.start()
        torch.cuda.synchronize()
        start_time = time()
        for _ in range(num_iters):
@@ -217,7 +213,7 @@ class TestMLP(unittest.TestCase):
        torch.cuda.synchronize()
        stop_time = time()
        print(F"C++ MLP time {(stop_time - start_time) * 1000. / num_iters:.4f} ms")
-        torch.cuda.profiler.stop()
+        #torch.cuda.profiler.stop()
 if __name__ == '__main__':
    unittest.main()