Merge branch 'master' into prepare_fused

3c53cf81 · Michael Carilli · b7f10ad0 · 4dc711bc · 3c53cf81 · 3c53cf81
Commit 3c53cf81 authored Apr 11, 2019 by Michael Carilli
4 changed files
--- a/apex/amp/lists/torch_overrides.py
+++ b/apex/amp/lists/torch_overrides.py
@@ -5,8 +5,9 @@ from .. import utils
 MODULE = torch
 FP16_FUNCS = [
-    # Math
+    # Low level functions wrapped by torch.nn layers.
-    # TODO: why are these in top-level torch namespace?
+    # The wrapper layers contain the weights which are then passed in as a parameter
+    # to these functions.
    'conv1d',
    'conv2d',
    'conv3d',
@@ -14,6 +15,7 @@ FP16_FUNCS = [
    'conv_transpose2d',
    'conv_transpose3d',
    'conv_tbc',
+    'prelu',
    # BLAS
    'addmm',
@@ -76,7 +78,6 @@ CASTS = [
    'addcmul',
    'atan2',
    'cross',
-    'prelu',
    # Element-wise _or_ tensor-wise math
    'add',

--- a/apex/normalization/fused_layer_norm.py
+++ b/apex/normalization/fused_layer_norm.py
@@ -3,6 +3,7 @@ import torch
 import numbers
 from torch.nn.parameter import Parameter
 from torch.nn import init
+from torch.nn import functional as F
 import importlib
 class FusedLayerNormAffineFunction(torch.autograd.Function):
@@ -144,6 +145,9 @@ class FusedLayerNorm(torch.nn.Module):
            init.zeros_(self.bias)
    def forward(self, input):
+        if not input.is_cuda:
+            return  F.layer_norm(
+                input, self.normalized_shape, self.weight, self.bias, self.eps)
        if self.elementwise_affine:
          return FusedLayerNormAffineFunction(self.normalized_shape,self.eps)(
              input, self.weight, self.bias)

--- a/tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
+++ b/tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
+import unittest
+import os
+import random
+import torch
+import apex
+class TestFusedLayerNorm(unittest.TestCase):
+    def setUp(self):
+        self.module = apex.normalization.FusedLayerNorm(normalized_shape=[32, 64], elementwise_affine=False)
+        self.input_ = torch.randn(16, 32, 64)
+        torch.cuda.manual_seed(42)
+    def forward_cpu(self, input_):
+        self.module.cpu()
+        return self.module(input_.cpu())
+    def forward_cuda(self, input_):
+        self.module.cuda()
+        return self.module(input_.cuda())
+    def test_forward_cuda(self):
+        out_ = self.forward_cuda(self.input_)
+        assert out_.is_cuda == True
+    def test_forward_cpu(self):
+        out_ = self.forward_cpu(self.input_)
+        assert out_.is_cuda == False
+    def test_same_output(self):
+        out_cpu = self.forward_cpu(self.input_)
+        out_cuda = self.forward_cuda(self.input_)
+        torch.testing.assert_allclose(out_cpu, out_cuda.cpu())
+class TestFusedLayerNormElemWise(TestFusedLayerNorm):
+    def setUp(self):
+        self.module = apex.normalization.FusedLayerNorm(normalized_shape=[32, 64], elementwise_affine=True)
+        self.input_ = torch.randn(16, 32, 64)
+        torch.cuda.manual_seed(42)
\ No newline at end of file
--- a/tests/L0/run_test.py
+++ b/tests/L0/run_test.py
 import unittest
 import sys
-test_dirs = ["run_amp", "run_fp16util", "run_mixed_adam"]
+test_dirs = ["run_amp", "run_fp16util", "run_mixed_adam", "run_fused_layer_norm"]
 runner = unittest.TextTestRunner(verbosity=2)