Merge branch 'master' of https://github.com/ROCmSoftwarePlatform/apex into...

Merge branch 'master' of https://github.com/ROCmSoftwarePlatform/apex into cl/fused-optimizers-bfp16

Merge branch 'master' of https://github.com/ROCmSoftwarePlatform/apex into...
Merge branch 'master' of https://github.com/ROCmSoftwarePlatform/apex into cl/fused-optimizers-bfp16
7e311e4b · lcskrishna · 98a64039 · b2b55439 · 7e311e4b · 7e311e4b
Commit 7e311e4b authored May 21, 2020 by lcskrishna
16 changed files
--- a/.jenkins/docker/build.sh
+++ b/.jenkins/docker/build.sh
+sudo docker build . --rm -t apex
--- a/.jenkins/docker/launch.sh
+++ b/.jenkins/docker/launch.sh
+sudo docker run -it -v $HOME:/data --rm --privileged --device=/dev/dri --device=/dev/kfd --network host --group-add video apex
--- a/Dockerfile
+++ b/Dockerfile
+ARG FROM_IMAGE=lcskrishna/rocm-pytorch:rocm3.3_ubuntu16.04_py3.6_pytorch_updated
+
+FROM ${FROM_IMAGE}
+RUN \
+    git clone --recursive https://github.com/ROCmSoftwarePlatform/apex.git && \
+    cd apex && \
+    python3.6 setup.py install --cpp_ext --cuda_ext
--- a/apex/__init__.py
+++ b/apex/__init__.py
@@ -18,3 +18,6 @@ from . import fp16_utils
 from . import optimizers
 from . import normalization
 from . import pyprof
+
+#common utilties to run tests on ROCm.
+from . import testing
--- a/apex/testing/__init__.py
+++ b/apex/testing/__init__.py
--- a/apex/testing/common_utils.py
+++ b/apex/testing/common_utils.py
+'''
+This file contains common utility functions for running the unit tests on ROCM.
+'''
+
+import torch
+import os
+import sys
+from functools import wraps
+import unittest
+
+
+TEST_WITH_ROCM = os.getenv('APEX_TEST_WITH_ROCM', '0') == '1'
+
+## Wrapper to skip the unit tests.
+def skipIfRocm(fn):
+    @wraps(fn)
+    def wrapper(*args, **kwargs):
+        if TEST_WITH_ROCM:
+            raise unittest.SkipTest("test doesn't currently work on ROCm stack.")
+        else:
+            fn(*args, **kwargs)
+    return wrapper
--- a/setup.py
+++ b/setup.py
@@ -101,7 +101,7 @@ version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5
 if "--cuda_ext" in sys.argv:
    from torch.utils.cpp_extension import CUDAExtension
    sys.argv.remove("--cuda_ext")
-    
+
    is_rocm_pytorch = False
    if torch.__version__ >= '1.5':
        from torch.utils.cpp_extension import ROCM_HOME
@@ -155,8 +155,7 @@ if "--cuda_ext" in sys.argv:
                                       'csrc/hip/multi_tensor_adagrad.hip',
                                       'csrc/hip/multi_tensor_novograd.hip',
                                       'csrc/hip/multi_tensor_lamb.hip'],
-                              extra_compile_args={'cxx' : ['-O3'] + version_dependent_macros,
-                                                  'nvcc': []}))
+                              extra_compile_args=['-O3'] + version_dependent_macros))

        if not is_rocm_pytorch:
            ext_modules.append(
@@ -168,7 +167,7 @@ if "--cuda_ext" in sys.argv:
        else:
            print ("INFO: Skipping syncbn extension.")

-    
+
        if not is_rocm_pytorch:
            ext_modules.append(
                CUDAExtension(name='fused_layer_norm_cuda',
@@ -277,7 +276,7 @@ if "--deprecated_fused_lamb" in sys.argv:
                                              'nvcc':['-O3',
                                                      '--use_fast_math'] + version_dependent_macros}))

-# Check, if ATen/CUDAGenerator.h is found, otherwise use the new ATen/CUDAGeneratorImpl.h, due to breaking change in https://github.com/pytorch/pytorch/pull/36026 
+# Check, if ATen/CUDAGenerator.h is found, otherwise use the new ATen/CUDAGeneratorImpl.h, due to breaking change in https://github.com/pytorch/pytorch/pull/36026
 generator_flag = []
 torch_dir = torch.__path__[0]
 if os.path.exists(os.path.join(torch_dir, 'include', 'ATen', 'CUDAGenerator.h')):

--- a/tests/L0/run_amp/test_basic_casts.py
+++ b/tests/L0/run_amp/test_basic_casts.py
@@ -11,6 +11,8 @@ import torch.nn.functional as F
 from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_BFLOAT16, ALWAYS_FLOAT, MATCH_INPUT

+from apex.testing.common_utils import skipIfRocm
+
 def run_layer_test(test_case, fns, expected, input_shape, test_backward=True):
    for fn, typ in it.product(fns, expected.keys()):
        x = torch.randn(input_shape, dtype=typ).requires_grad_()
@@ -101,9 +103,11 @@ class TestBasicCastsBFloat16(_TestBasicCasts):
    def tearDown(self):
        self.handle._deactivate()

+    @skipIfRocm
    def test_linear_is_bfloat16(self):
        self._test_linear(ALWAYS_BFLOAT16)

+    @skipIfRocm
    def test_conv2d_is_bfloat16(self):
        self._test_conv2d(ALWAYS_BFLOAT16)

@@ -227,9 +231,11 @@ class TestTensorCastsBFloat16(_TestTensorCasts):
    def tearDown(self):
        self.handle._deactivate()

+    @skipIfRocm
    def test_matmul_method_is_bfloat16(self):
        self._test_matmul_method(ALWAYS_BFLOAT16)

+    @skipIfRocm
    def test_matmul_op_is_bfloat16(self):
        self._test_matmul_op(ALWAYS_BFLOAT16)


--- a/tests/L0/run_amp/test_checkpointing.py
+++ b/tests/L0/run_amp/test_checkpointing.py
@@ -6,7 +6,7 @@ import torch.nn.functional as F
 import torch.optim as optim

 from apex import amp
-
+from apex.testing.common_utils import skipIfRocm

 from utils import common_init, FLOAT

@@ -161,6 +161,7 @@ class TestCheckpointing(unittest.TestCase):
                            # skip tests for different opt_levels
                            continue

+    @skipIfRocm
    def test_loss_scale_decrease(self):
        num_losses = 3
        nb_decrease_loss_scales = [0, 1, 2]

--- a/tests/L0/run_amp/test_fused_sgd.py
+++ b/tests/L0/run_amp/test_fused_sgd.py
@@ -13,6 +13,7 @@ from torch.nn import Parameter
 from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

+from apex.testing.common_utils import skipIfRocm

 try:
  import amp_C
@@ -53,6 +54,7 @@ class TestMultipleModelsOptimizersLosses(unittest.TestCase):
        pass

    @unittest.skipIf(disabled, "amp_C is unavailable")
+    @skipIfRocm
    def test_2models2losses1optimizer(self):
        model0 = MyModel(1)
        model1 = MyModel(2)
@@ -185,6 +187,7 @@ class TestMultipleModelsOptimizersLosses(unittest.TestCase):
                            _amp_state.handle._deactivate()

    @unittest.skipIf(disabled, "amp_C is unavailable")
+    @skipIfRocm
    def test_3models2losses1optimizer(self):

        model0 = MyModel(1)
@@ -346,6 +349,7 @@ class TestMultipleModelsOptimizersLosses(unittest.TestCase):
                              _amp_state.handle._deactivate()

    @unittest.skipIf(disabled, "amp_C is unavailable")
+    @skipIfRocm
    def test_2models2losses2optimizers(self):
        model0 = MyModel(1)
        model1 = MyModel(2)
@@ -541,6 +545,7 @@ class TestMultipleModelsOptimizersLosses(unittest.TestCase):
                            _amp_state.handle._deactivate()

    @unittest.skipIf(disabled, "amp_C is unavailable")
+    @skipIfRocm
    def test_3models2losses2optimizers(self):
        model0 = MyModel(1)
        model1 = MyModel(2)

--- a/tests/L0/run_amp/test_multi_tensor_axpby.py
+++ b/tests/L0/run_amp/test_multi_tensor_axpby.py
@@ -12,6 +12,8 @@ from math import floor
 from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

+from apex.testing.common_utils import skipIfRocm
+
 try:
  import amp_C
  from amp_C import multi_tensor_axpby
@@ -140,6 +142,7 @@ class TestMultiTensorAxpby(unittest.TestCase):

    @unittest.skipIf(disabled, "amp_C is unavailable")
    @unittest.skipIf(not try_nhwc, "torch version is 1.4 or earlier, may not support nhwc")
+    @skipIfRocm
    def test_fuzz_nhwc(self):
        input_size_pairs = (
            ((7, 77, 7, 77), (5, 55, 5, 55)),

--- a/tests/L0/run_amp/test_multi_tensor_l2norm.py
+++ b/tests/L0/run_amp/test_multi_tensor_l2norm.py
@@ -11,6 +11,8 @@ import torch.nn.functional as F
 from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

+from apex.testing.common_utils import skipIfRocm
+
 try:
  import amp_C
  from amp_C import multi_tensor_l2norm
@@ -56,6 +58,7 @@ class TestMultiTensorL2Norm(unittest.TestCase):
        self.assertTrue(self.overflow_buf.item() == 0)

    @unittest.skipIf(disabled, "amp_C is unavailable")
+    @skipIfRocm
    def test_fuzz(self):
        input_size_pairs = (
            (7777*77, 555*555),

--- a/tests/L0/run_amp/test_multiple_models_optimizers_losses.py
+++ b/tests/L0/run_amp/test_multiple_models_optimizers_losses.py
@@ -13,6 +13,8 @@ from torch.nn import Parameter
 from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

+from apex.testing.common_utils import skipIfRocm
+
 class MyModel(torch.nn.Module):
    def __init__(self, unique):
        super(MyModel, self).__init__()
@@ -41,7 +43,8 @@ class TestMultipleModelsOptimizersLosses(unittest.TestCase):

    def tearDown(self):
        pass
-
+    
+    @skipIfRocm
    def test_2models2losses1optimizer(self):
        model0 = MyModel(1)
        model1 = MyModel(2)
@@ -167,6 +170,7 @@ class TestMultipleModelsOptimizersLosses(unittest.TestCase):
                      if opt_level == "O1":
                          _amp_state.handle._deactivate()

+    @skipIfRocm
    def test_3models2losses1optimizer(self):

        model0 = MyModel(1)
@@ -323,6 +327,7 @@ class TestMultipleModelsOptimizersLosses(unittest.TestCase):
                        if opt_level == "O1":
                            _amp_state.handle._deactivate()

+    @skipIfRocm
    def test_2models2losses2optimizers(self):
        model0 = MyModel(1)
        model1 = MyModel(2)
@@ -513,6 +518,7 @@ class TestMultipleModelsOptimizersLosses(unittest.TestCase):
                      if opt_level == "O1":
                          _amp_state.handle._deactivate()

+    @skipIfRocm
    def test_3models2losses2optimizers(self):
        model0 = MyModel(1)
        model1 = MyModel(2)

--- a/tests/L0/run_amp/test_rnn.py
+++ b/tests/L0/run_amp/test_rnn.py
@@ -6,6 +6,7 @@ import torch
 from torch import nn

 from utils import common_init, HALF
+from apex.testing.common_utils import skipIfRocm

 class TestRnnCells(unittest.TestCase):
    def setUp(self):
@@ -73,6 +74,7 @@ class TestRnns(unittest.TestCase):
            output[-1, :, :].float().sum().backward()
            self.assertEqual(x.grad.dtype, x.dtype)

+    @skipIfRocm
    def test_rnn_is_half(self):
        configs = [(1, False), (2, False), (2, True)]
        for layers, bidir in configs:
@@ -80,6 +82,7 @@ class TestRnns(unittest.TestCase):
                         nonlinearity='relu', bidirectional=bidir)
            self.run_rnn_test(rnn, layers, bidir)

+    @skipIfRocm
    def test_gru_is_half(self):
        configs = [(1, False), (2, False), (2, True)]
        for layers, bidir in configs:
@@ -87,6 +90,7 @@ class TestRnns(unittest.TestCase):
                         bidirectional=bidir)
            self.run_rnn_test(rnn, layers, bidir)

+    @skipIfRocm
    def test_lstm_is_half(self):
        configs = [(1, False), (2, False), (2, True)]
        for layers, bidir in configs:
@@ -94,6 +98,7 @@ class TestRnns(unittest.TestCase):
                         bidirectional=bidir)
            self.run_rnn_test(rnn, layers, bidir, state_tuple=True)

+    @skipIfRocm
    def test_rnn_packed_sequence(self):
        num_layers = 2
        rnn = nn.RNN(input_size=self.h, hidden_size=self.h, num_layers=num_layers)

--- a/tests/L0/run_rocm.sh
+++ b/tests/L0/run_rocm.sh
+#!/bin/bash
+APEX_TEST_WITH_ROCM=1 python3.6 run_test.py
--- a/tests/L0/run_test.py
+++ b/tests/L0/run_test.py
 import unittest
 import sys

+from apex.testing.common_utils import TEST_WITH_ROCM, skipIfRocm
+
 test_dirs = ["run_amp", "run_fp16util", "run_optimizers", "run_fused_layer_norm", "run_pyprof_nvtx", "run_pyprof_data", "run_mlp"]

+ROCM_BLACKLIST = [
+    'run_fused_layer_norm',
+    'run_pyprof_nvtx',
+    'run_pyprof_data',
+    'run_mlp'
+]
+
 runner = unittest.TextTestRunner(verbosity=2)

 errcode = 0

 for test_dir in test_dirs:
+    if (test_dir in ROCM_BLACKLIST) and TEST_WITH_ROCM:
+        continue
    suite = unittest.TestLoader().discover(test_dir)

    print("\nExecuting tests from " + test_dir)