Fix bug in DDP test

184b0404 · Sengxian · 5ead59db · 184b0404
Commit 184b0404 authored Feb 08, 2021 by Sengxian
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 8 deletions

tests/test_numerical.py tests/test_numerical.py +8 -8

No files found.
--- a/tests/test_numerical.py
+++ b/tests/test_numerical.py
@@ -157,12 +157,12 @@ def test_fmoe(
            )
            para_array = [torch.empty_like(para_tensor) for _ in range(world_size)]
            torch.distributed.all_gather(para_array, para_tensor)
-            para_tesnor_gathered = torch.cat(para_array, dim=0)
+            para_tensor_gathered = torch.cat(para_array, dim=0)
-            assert len(para_array) == len(moe_raw.experts)
+            assert para_tensor_gathered.shape[0] == len(moe_raw.experts)
-            for expertID in range(para_tesnor_gathered.shape[0]):
+            for expertID in range(para_tensor_gathered.shape[0]):
-                list(moe_raw.experts[expertID].parameters())[idx].data = para_tensor[
+                list(moe_raw.experts[expertID].parameters())[
-                    expertID
+                    idx
-                ]
+                ].data = para_tensor_gathered[expertID]
    moe_out, raw_out = _perform_forward(moe, moe_raw, batch_size, d_model, top_k)
@@ -202,10 +202,10 @@ def _run_distributed(func: Callable, args: Dict):
    ps, n = [], 2
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = "36666"
-    os.environ["WORLD_SIZE"] = str(n)
+    os.environ["OMPI_COMM_WORLD_SIZE"] = str(n)
    for i in range(n):
-        os.environ["RANK"] = str(i)
+        os.environ["OMPI_COMM_WORLD_RANK"] = str(i)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(i)
        p = subprocess.Popen(
            [sys.executable, __file__, func.__name__, json.dumps(args)],