Merge pull request #400 from myleott/new_function_api

Update FusedLayerNorm for new function API

Merge pull request #400 from myleott/new_function_api
Update FusedLayerNorm for new function API
3d01e4a0 · ngimel · GitHub · 574fe244 · 0dbf6c2a · 3d01e4a0
Unverified Commit 3d01e4a0 authored Jul 31, 2019 by ngimel Committed by GitHub Jul 31, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 39 additions and 34 deletions

apex/normalization/fused_layer_norm.py apex/normalization/fused_layer_norm.py +39 -34

No files found.
--- a/apex/normalization/fused_layer_norm.py
+++ b/apex/normalization/fused_layer_norm.py
@@ -6,60 +6,66 @@ from torch.nn import init
 from torch.nn import functional as F
 import importlib
-class FusedLayerNormAffineFunction(torch.autograd.Function):
+global fused_layer_norm_cuda
-  def __init__(self, normalized_shape, eps=1e-6):
+fused_layer_norm_cuda = None
-    global fused_layer_norm_cuda
-    fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
-    self.normalized_shape = normalized_shape
+class FusedLayerNormAffineFunction(torch.autograd.Function):
-    self.eps = eps
-  def forward(self, input, weight, bias):
+  @staticmethod
+  def forward(ctx, input, weight, bias, normalized_shape, eps):
+    global fused_layer_norm_cuda
+    if fused_layer_norm_cuda is None:
+        fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
+    ctx.normalized_shape = normalized_shape
+    ctx.eps = eps
    input_ = input.contiguous()
    weight_ = weight.contiguous()
    bias_ = bias.contiguous()
    output, mean, invvar = fused_layer_norm_cuda.forward_affine(
-        input_, self.normalized_shape, weight_, bias_, self.eps)
+        input_, ctx.normalized_shape, weight_, bias_, ctx.eps)
-    self.save_for_backward(input_, weight_, bias_, mean, invvar)
+    ctx.save_for_backward(input_, weight_, bias_, mean, invvar)
    return output
-  def backward(self, grad_output):
+  @staticmethod
-    input_, weight_, bias_, mean, invvar = self.saved_tensors
+  def backward(ctx, grad_output):
+    input_, weight_, bias_, mean, invvar = ctx.saved_tensors
    grad_input = grad_weight = grad_bias = None
    grad_input, grad_weight, grad_bias = fused_layer_norm_cuda.backward_affine(
        grad_output.contiguous(), mean, invvar,
-        input_, self.normalized_shape, 
+        input_, ctx.normalized_shape,
-        weight_, bias_, self.eps)
+        weight_, bias_, ctx.eps)
-    return grad_input, grad_weight, grad_bias;
+    return grad_input, grad_weight, grad_bias, None, None
 class FusedLayerNormFunction(torch.autograd.Function):
-  def __init__(self, normalized_shape, eps=1e-6):
-    global fused_layer_norm_cuda
-    fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
-    self.normalized_shape = normalized_shape
-    self.eps = eps
-  def forward(self, input):
+  @staticmethod
+  def forward(ctx, input, normalized_shape, eps):
+    global fused_layer_norm_cuda
+    if fused_layer_norm_cuda is None:
+        fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
+    ctx.normalized_shape = normalized_shape
+    ctx.eps = eps
    input_ = input.contiguous()
    output, mean, invvar = fused_layer_norm_cuda.forward(
-        input_, self.normalized_shape, self.eps)
+        input_, ctx.normalized_shape, ctx.eps)
-    self.save_for_backward(input_, mean, invvar)
+    ctx.save_for_backward(input_, mean, invvar)
    return output
-  def backward(self, grad_output):
+  @staticmethod
-    input_, mean, invvar = self.saved_tensors
+  def backward(ctx, grad_output):
+    input_, mean, invvar = ctx.saved_tensors
    grad_input = None
    grad_input = fused_layer_norm_cuda.backward(
        grad_output.contiguous(), mean, invvar,
-        input_, self.normalized_shape,
+        input_, ctx.normalized_shape,
-        self.eps)
+        ctx.eps)
-    return grad_input
+    return grad_input, None, None
 def fused_layer_norm_affine(input, normalized_shape, weight, bias, eps=1e-6):
-    return FusedLayerNormAffineFunction(normalized_shape,eps)(input, weight, bias)
+    return FusedLayerNormAffineFunction.apply(input, weight, bias, normalized_shape, eps)
 def fused_layer_norm(input, normalized_shape, eps=1e-6):
-    return FusedLayerNormFunction(normalized_shape,eps)(input)
+    return FusedLayerNormFunction.apply(input, normalized_shape, eps)
 class FusedLayerNorm(torch.nn.Module):
    r"""Applies Layer Normalization over a mini-batch of inputs as described in
@@ -149,11 +155,10 @@ class FusedLayerNorm(torch.nn.Module):
            return  F.layer_norm(
                input, self.normalized_shape, self.weight, self.bias, self.eps)
        if self.elementwise_affine:
-          return FusedLayerNormAffineFunction(self.normalized_shape,self.eps)(
+          return FusedLayerNormAffineFunction.apply(
-              input, self.weight, self.bias)
+              input, self.weight, self.bias, self.normalized_shape,self.eps)
        else:
-          return FusedLayerNormFunction(self.normalized_shape,self.eps)(
+          return FusedLayerNormFunction.apply(input, self.normalized_shape, self.eps)
-              input)
    def extra_repr(self):
        return '{normalized_shape}, eps={eps}, ' \