Commit 3c53cf81 authored by Michael Carilli's avatar Michael Carilli
Browse files

Merge branch 'master' into prepare_fused

parents b7f10ad0 4dc711bc
...@@ -5,8 +5,9 @@ from .. import utils ...@@ -5,8 +5,9 @@ from .. import utils
MODULE = torch MODULE = torch
FP16_FUNCS = [ FP16_FUNCS = [
# Math # Low level functions wrapped by torch.nn layers.
# TODO: why are these in top-level torch namespace? # The wrapper layers contain the weights which are then passed in as a parameter
# to these functions.
'conv1d', 'conv1d',
'conv2d', 'conv2d',
'conv3d', 'conv3d',
...@@ -14,6 +15,7 @@ FP16_FUNCS = [ ...@@ -14,6 +15,7 @@ FP16_FUNCS = [
'conv_transpose2d', 'conv_transpose2d',
'conv_transpose3d', 'conv_transpose3d',
'conv_tbc', 'conv_tbc',
'prelu',
# BLAS # BLAS
'addmm', 'addmm',
...@@ -76,7 +78,6 @@ CASTS = [ ...@@ -76,7 +78,6 @@ CASTS = [
'addcmul', 'addcmul',
'atan2', 'atan2',
'cross', 'cross',
'prelu',
# Element-wise _or_ tensor-wise math # Element-wise _or_ tensor-wise math
'add', 'add',
......
...@@ -3,6 +3,7 @@ import torch ...@@ -3,6 +3,7 @@ import torch
import numbers import numbers
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from torch.nn import init from torch.nn import init
from torch.nn import functional as F
import importlib import importlib
class FusedLayerNormAffineFunction(torch.autograd.Function): class FusedLayerNormAffineFunction(torch.autograd.Function):
...@@ -144,6 +145,9 @@ class FusedLayerNorm(torch.nn.Module): ...@@ -144,6 +145,9 @@ class FusedLayerNorm(torch.nn.Module):
init.zeros_(self.bias) init.zeros_(self.bias)
def forward(self, input): def forward(self, input):
if not input.is_cuda:
return F.layer_norm(
input, self.normalized_shape, self.weight, self.bias, self.eps)
if self.elementwise_affine: if self.elementwise_affine:
return FusedLayerNormAffineFunction(self.normalized_shape,self.eps)( return FusedLayerNormAffineFunction(self.normalized_shape,self.eps)(
input, self.weight, self.bias) input, self.weight, self.bias)
......
import unittest
import os
import random
import torch
import apex
class TestFusedLayerNorm(unittest.TestCase):
def setUp(self):
self.module = apex.normalization.FusedLayerNorm(normalized_shape=[32, 64], elementwise_affine=False)
self.input_ = torch.randn(16, 32, 64)
torch.cuda.manual_seed(42)
def forward_cpu(self, input_):
self.module.cpu()
return self.module(input_.cpu())
def forward_cuda(self, input_):
self.module.cuda()
return self.module(input_.cuda())
def test_forward_cuda(self):
out_ = self.forward_cuda(self.input_)
assert out_.is_cuda == True
def test_forward_cpu(self):
out_ = self.forward_cpu(self.input_)
assert out_.is_cuda == False
def test_same_output(self):
out_cpu = self.forward_cpu(self.input_)
out_cuda = self.forward_cuda(self.input_)
torch.testing.assert_allclose(out_cpu, out_cuda.cpu())
class TestFusedLayerNormElemWise(TestFusedLayerNorm):
def setUp(self):
self.module = apex.normalization.FusedLayerNorm(normalized_shape=[32, 64], elementwise_affine=True)
self.input_ = torch.randn(16, 32, 64)
torch.cuda.manual_seed(42)
\ No newline at end of file
import unittest import unittest
import sys import sys
test_dirs = ["run_amp", "run_fp16util", "run_mixed_adam"] test_dirs = ["run_amp", "run_fp16util", "run_mixed_adam", "run_fused_layer_norm"]
runner = unittest.TextTestRunner(verbosity=2) runner = unittest.TextTestRunner(verbosity=2)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment