zero gate update

89de2153 · Rick Ho · 94eca783 · 89de2153
Commit 89de2153 authored Feb 26, 2021 by Rick Ho
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 3 deletions

fmoe/gates.py fmoe/gates.py +6 -3

No files found.
--- a/fmoe/gates.py
+++ b/fmoe/gates.py
@@ -10,15 +10,18 @@ import torch.nn.functional as F
 class ZeroGate(nn.Module):
    def __init__(self, d_model, num_expert, world_size, top_k=2):
        super().__init__()
+        self.top_k = top_k

    def forward(self, inp):
        r'''
        The naive implementation simply calculates the top-k of a linear layer's
        output.
        '''
-        idx = torch.zeros(inp.shape[0], dtype=torch.int64, device=inp.device)
-        score = torch.ones(inp.shape[0], device=inp.device)
-        return idx, score.reshape(-1, 1, 1)
+        idx = torch.zeros(inp.shape[0] * self.top_k,
+                dtype=torch.int64, device=inp.device)
+        score = torch.ones(inp.shape[0] * self.top_k,
+                device=inp.device) / self.top_k
+        return idx, score.reshape(-1, 1, self.top_k)


 class NaiveGate(nn.Module):