update test

f804a121 · Rick Ho · 980cf4b6 · f804a121 · f804a121 · f804a121
Commit f804a121 authored May 19, 2021 by Rick Ho
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 7 deletions

tests/moe.py tests/moe.py +5 -3

tests/test_gates.py tests/test_gates.py +2 -2

tests/test_numerical.py tests/test_numerical.py +1 -2

No files found.
--- a/tests/moe.py
+++ b/tests/moe.py
@@ -28,11 +28,12 @@ class BruteForceMoELinear(nn.Module):
        self.top_k = top_k

    def forward(self, inp, gate_idx, gate_score):
-        gate_long = gate_idx.long()
+        inp = inp.repeat_interleave(repeats=self.top_k, dim=0)
+        gate_long = gate_idx.long().view(-1)
        batch_size = inp.size(0)
        o = torch.empty(batch_size, self.d_model, dtype=inp.dtype, device=inp.device)
        for i in range(self.weight_htoh4.shape[0]):
-            idx = gate_idx == i
+            idx = gate_long == i
            x = inp[idx]
            x = x @ self.weight_htoh4[i].t()
            x = x + self.bias_htoh4[i]
@@ -56,7 +57,8 @@ class BruteForceMoE(nn.Module):
        self.experts = [expert(d_model) for _ in range(num_expert * world_size)]

    def forward(self, inp, gate_idx, gate_score):
-        gate_long = gate_idx.long()
+        inp = inp.repeat_interleave(repeats=self.top_k, dim=0)
+        gate_long = gate_idx.long().view(-1)
        batch_size = inp.size(0)
        x = inp.new_zeros((batch_size, self.d_model))
        for i in range(batch_size):

--- a/tests/test_gates.py
+++ b/tests/test_gates.py
@@ -58,5 +58,5 @@ def test_switch_gate(d_model, batch_size, n_expert, cap):

 if __name__ == '__main__':
    _ensure_initialized()
-    # test_gshard_gate(4096, 1024, 4, .2)
-    test_switch_gate(4096, 1024, 4, .2)
+    test_gshard_gate(4096, 1024, 4, .2)
+    # test_switch_gate(4096, 1024, 4, .2)
--- a/tests/test_numerical.py
+++ b/tests/test_numerical.py
@@ -39,9 +39,8 @@ def _perform_forward(

    inp_raw.requires_grad = True
    gate_idx, gate_score = moe.gate(inp_raw)
-    inp_repeated = inp_raw.repeat_interleave(repeats=top_k, dim=0)
    moe_out = moe(inp)
-    raw_out = moe_raw(inp_repeated, gate_idx, gate_score)
+    raw_out = moe_raw(inp_raw, gate_idx, gate_score)

    raw_out.mean().backward()
    moe_out.mean().backward()