Commit 1b7b02ef authored by hubertlu-tw's avatar hubertlu-tw
Browse files

Un-skip some tests and skip some flaky tests

parent 4cfbe05c
...@@ -8,7 +8,7 @@ import torch.optim as optim ...@@ -8,7 +8,7 @@ import torch.optim as optim
from apex import amp from apex import amp
from utils import common_init, FLOAT from utils import common_init, FLOAT
from apex.testing.common_utils import skipFlakyTest
class MyModel(torch.nn.Module): class MyModel(torch.nn.Module):
def __init__(self): def __init__(self):
...@@ -161,6 +161,7 @@ class TestCheckpointing(unittest.TestCase): ...@@ -161,6 +161,7 @@ class TestCheckpointing(unittest.TestCase):
# skip tests for different opt_levels # skip tests for different opt_levels
continue continue
@skipFlakyTest
def test_loss_scale_decrease(self): def test_loss_scale_decrease(self):
num_losses = 3 num_losses = 3
nb_decrease_loss_scales = [0, 1, 2] nb_decrease_loss_scales = [0, 1, 2]
......
...@@ -4,7 +4,7 @@ import unittest ...@@ -4,7 +4,7 @@ import unittest
import torch import torch
import apex import apex
from apex.testing.common_utils import skipFlakyTest
class TestFusedLayerNorm(unittest.TestCase): class TestFusedLayerNorm(unittest.TestCase):
dtype = torch.float dtype = torch.float
...@@ -188,6 +188,7 @@ class TestFusedRMSNormElemWiseHalf(TestFusedRMSNormElemWise): ...@@ -188,6 +188,7 @@ class TestFusedRMSNormElemWiseHalf(TestFusedRMSNormElemWise):
self.skipTest("Skip to save time") self.skipTest("Skip to save time")
@skipFlakyTest
class TestFusedLayerNormElemWiseBFloat16(TestFusedLayerNormElemWise): class TestFusedLayerNormElemWiseBFloat16(TestFusedLayerNormElemWise):
dtype = torch.bfloat16 dtype = torch.bfloat16
# NOTE (mkozuki): [BFloat16 Layer Norm flakiness] # NOTE (mkozuki): [BFloat16 Layer Norm flakiness]
......
...@@ -7,7 +7,7 @@ import torch ...@@ -7,7 +7,7 @@ import torch
from torch import nn from torch import nn
from apex.mlp import MLP from apex.mlp import MLP
from apex.testing.common_utils import skipIfRocm from apex.testing.common_utils import skipFlakyTest
batch_size = 1024 batch_size = 1024
mlp_sizes = [480, 1024, 1024, 512, 256, 1] mlp_sizes = [480, 1024, 1024, 512, 256, 1]
...@@ -18,7 +18,6 @@ class TestMLP(unittest.TestCase): ...@@ -18,7 +18,6 @@ class TestMLP(unittest.TestCase):
def test_creation(self): def test_creation(self):
MLP(mlp_sizes) MLP(mlp_sizes)
@skipIfRocm
def test_numeric(self): def test_numeric(self):
mlp = MLP(mlp_sizes).cuda() mlp = MLP(mlp_sizes).cuda()
...@@ -53,7 +52,6 @@ class TestMLP(unittest.TestCase): ...@@ -53,7 +52,6 @@ class TestMLP(unittest.TestCase):
ref_mlp[0].bias.grad.detach().cpu().numpy(), ref_mlp[0].bias.grad.detach().cpu().numpy(),
atol=1e-7, rtol=1e-5) atol=1e-7, rtol=1e-5)
@skipIfRocm
def test_no_bias(self): def test_no_bias(self):
for use_activation in ['none', 'relu', 'sigmoid']: for use_activation in ['none', 'relu', 'sigmoid']:
mlp = MLP(mlp_sizes, bias=False, activation=use_activation).cuda() mlp = MLP(mlp_sizes, bias=False, activation=use_activation).cuda()
...@@ -91,7 +89,7 @@ class TestMLP(unittest.TestCase): ...@@ -91,7 +89,7 @@ class TestMLP(unittest.TestCase):
ref_mlp[0].weight.grad.detach().cpu().numpy(), ref_mlp[0].weight.grad.detach().cpu().numpy(),
atol=1e-7, rtol=100) atol=1e-7, rtol=100)
@skipIfRocm @skipFlakyTest
def test_with_bias(self): def test_with_bias(self):
for use_activation in ['none', 'relu', 'sigmoid']: for use_activation in ['none', 'relu', 'sigmoid']:
mlp = MLP(mlp_sizes, bias=True, activation=use_activation).cuda() mlp = MLP(mlp_sizes, bias=True, activation=use_activation).cuda()
...@@ -134,7 +132,6 @@ class TestMLP(unittest.TestCase): ...@@ -134,7 +132,6 @@ class TestMLP(unittest.TestCase):
ref_mlp[0].bias.grad.detach().cpu().numpy(), ref_mlp[0].bias.grad.detach().cpu().numpy(),
atol=1e-7, rtol=1e-5) atol=1e-7, rtol=1e-5)
@skipIfRocm
def test_no_grad(self): def test_no_grad(self):
mlp = MLP(mlp_sizes).cuda() mlp = MLP(mlp_sizes).cuda()
...@@ -165,7 +162,6 @@ class TestMLP(unittest.TestCase): ...@@ -165,7 +162,6 @@ class TestMLP(unittest.TestCase):
ref_mlp[0].weight.grad.detach().cpu().numpy(), ref_mlp[0].weight.grad.detach().cpu().numpy(),
atol=1e-7, rtol=1e-5) atol=1e-7, rtol=1e-5)
@skipIfRocm
def test_performance_half(self): def test_performance_half(self):
mlp = MLP(mlp_sizes).cuda().half() mlp = MLP(mlp_sizes).cuda().half()
...@@ -195,7 +191,7 @@ class TestMLP(unittest.TestCase): ...@@ -195,7 +191,7 @@ class TestMLP(unittest.TestCase):
mlp.zero_grad() mlp.zero_grad()
test_loss.backward() test_loss.backward()
torch.cuda.profiler.start() #torch.cuda.profiler.start()
torch.cuda.synchronize() torch.cuda.synchronize()
start_time = time() start_time = time()
for _ in range(num_iters): for _ in range(num_iters):
...@@ -217,7 +213,7 @@ class TestMLP(unittest.TestCase): ...@@ -217,7 +213,7 @@ class TestMLP(unittest.TestCase):
torch.cuda.synchronize() torch.cuda.synchronize()
stop_time = time() stop_time = time()
print(F"C++ MLP time {(stop_time - start_time) * 1000. / num_iters:.4f} ms") print(F"C++ MLP time {(stop_time - start_time) * 1000. / num_iters:.4f} ms")
torch.cuda.profiler.stop() #torch.cuda.profiler.stop()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment