Merge pull request #66 from laekov/slurm-benchmarks

Update benchmark according to update of gates

Merge pull request #66 from laekov/slurm-benchmarks
Update benchmark according to update of gates
0ec96584 · Rick Ho · GitHub · f93ad285 · 98584dd0 · 0ec96584
Unverified Commit 0ec96584 authored Aug 04, 2021 by Rick Ho Committed by GitHub Aug 04, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 22 additions and 28 deletions

tests/benchmark_mlp.py tests/benchmark_mlp.py +1 -6

tests/test.sh tests/test.sh +21 -22

No files found.
--- a/tests/benchmark_mlp.py
+++ b/tests/benchmark_mlp.py
@@ -40,7 +40,7 @@ class BruteForceMoE(nn.Module):
    def forward(self, inp):
        if self.pre_lnorm:
            inp = self.layer_norm(inp)
-        gate_top_k_idx, gate_score, _ = self.gate(inp)
+        gate_top_k_idx, gate_score = self.gate(inp)
        inp = inp.repeat_interleave(repeats=self.top_k, dim=0)
        x = self.mlp(inp, gate_top_k_idx, gate_score)
        if not self.pre_lnorm:
@@ -126,11 +126,6 @@ def benchmark_mlp(MOELayer, batch_size, in_feat, hidden_feat, num_expert, top_k)
 if __name__ == "__main__":
-    os.environ["RANK"] = os.environ.get("OMPI_COMM_WORLD_RANK", "0")
-    os.environ["WORLD_SIZE"] = os.environ.get("OMPI_COMM_WORLD_SIZE", "1")
-    os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get(
-        "OMPI_COMM_WORLD_LOCAL_RANK", "0"
-    )
    if int(os.environ["WORLD_SIZE"]) > 1:
        torch.distributed.init_process_group(backend="nccl")
        rank = torch.distributed.get_rank()

--- a/tests/test.sh
+++ b/tests/test.sh
 #!/bin/bash
+if [ -z $MASTER_ADDR ]
-if [ ! -z $OMPI_COMM_WORLD_LOCAL_RANK ]
 then
-	export CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK
+    if [ -z $SLURM_JOB_ID ]
+    then
+        export MASTER_ADDR=localhost
+    else
+        export MASTER_ADDR=$(scontrol show JobId=$SLURM_JOB_ID | grep BatchHost | tr '=' ' ' | awk '{print $2}')
+    fi
 fi
 if [ -z $MASTER_PORT ]
 then
-	export MASTER_ADDR=localhost
+    export MASTER_PORT=12215
-	export MASTER_PORT=36666
 fi
-if [ -z $OMPI_COMM_WORLD_RANK ]
+if [ ! -z $OMPI_COMM_WORLD_RANK ]
+then
+    RANK=$OMPI_COMM_WORLD_RANK
+    localrank=$OMPI_COMM_WORLD_LOCAL_RANK
+elif [ ! -z $SLURM_PROCID ]
 then
-	RANK=single
+    export RANK=$SLURM_PROCID
+    export WORLD_SIZE=$SLURM_NPROCS
+    localrank=$SLURM_LOCALID
 else
-	RANK=$OMPI_COMM_WORLD_RANK 
+    RANK=0
+    localrank=0
+    WORLD_SIZE=1
 fi
-mkdir -p logs
+export CUDA_VISIBLE_DEVICES=$localrank
-PYTHON_EXEC=python3
-PYTHON_VERSION=$($PYTHON_EXEC --version)
-PYTHON_REVISION=${PYTHON_VERSION:7:3}
-SCRIPT_PATH=$(dirname $(dirname $(realpath $0)))
-source ~/scripts/torch.env
-export PYTHONPATH=$SCRIPT_PATH:$SCRIPT_PATH/build/lib.linux-x86_64-$PYTHON_REVISION:$PYTHONPATH
-core0=$(expr $OMPI_COMM_WORLD_LOCAL_RANK \* 4)
-cores=$core0-$(expr $core0 + 3)
-exec numactl -C $cores $PYTHON_EXEC $@ 2>logs/$RANK.log
+exec $@