Updated arguments for MOELinear.apply

8bac18dc · TiagoMAntunes · a3b2eb62 · 8bac18dc · 8bac18dc
Commit 8bac18dc authored Mar 23, 2021 by TiagoMAntunes
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

fmoe/functions.py fmoe/functions.py +4 -4

fmoe/layers.py fmoe/layers.py +1 -1

No files found.
--- a/fmoe/functions.py
+++ b/fmoe/functions.py
@@ -110,21 +110,21 @@ class MOELinear(Function):
    """

    @staticmethod
-    def forward(ctx, global_input_buf, weight, fwd_expert_count):
+    def forward(ctx, global_input_buf, fwd_expert_count, weight, bias=None):
        (global_output_buf,) = fmoe_cuda.forward(
            global_input_buf, weight, fwd_expert_count
        )
-        variables = (global_input_buf, weight, fwd_expert_count)
+        variables = (global_input_buf, fwd_expert_count, weight)
        ctx.save_for_backward(*variables)
        return global_output_buf

    @staticmethod
    def backward(ctx, grad_out):
-        (input_buf, weight, fwd_expert_count) = ctx.saved_tensors
+        (input_buf, fwd_expert_count, weight) = ctx.saved_tensors
        grad_inp_buf, grad_weight = fmoe_cuda.backward(
            grad_out, input_buf, weight, fwd_expert_count
        )
-        return grad_inp_buf, grad_weight, None
+        return grad_inp_buf, None, grad_weight


 class MOEGather(Function):

--- a/fmoe/layers.py
+++ b/fmoe/layers.py
@@ -41,7 +41,7 @@ class FMoELinear(nn.Module):
        r"""
        Call MOE function
        """
-        x = MOELinear.apply(inp, self.weight, fwd_expert_count)
+        x = MOELinear.apply(inp, fwd_expert_count, self.weight)
        if self.bias is not None:
            # TODO: torch.repeat_interleave seems have numerical
            # instability in backward, leading to incorrect