"src/vscode:/vscode.git/clone" did not exist on "58bf2682612bc29b7cdb8a10ba6eee28a024d6d3"
Commit 98584dd0 authored by Rick Ho's avatar Rick Ho
Browse files

update benchmark accordingly

parent bba5f289
...@@ -40,7 +40,7 @@ class BruteForceMoE(nn.Module): ...@@ -40,7 +40,7 @@ class BruteForceMoE(nn.Module):
def forward(self, inp): def forward(self, inp):
if self.pre_lnorm: if self.pre_lnorm:
inp = self.layer_norm(inp) inp = self.layer_norm(inp)
gate_top_k_idx, gate_score, _ = self.gate(inp) gate_top_k_idx, gate_score = self.gate(inp)
inp = inp.repeat_interleave(repeats=self.top_k, dim=0) inp = inp.repeat_interleave(repeats=self.top_k, dim=0)
x = self.mlp(inp, gate_top_k_idx, gate_score) x = self.mlp(inp, gate_top_k_idx, gate_score)
if not self.pre_lnorm: if not self.pre_lnorm:
...@@ -126,11 +126,6 @@ def benchmark_mlp(MOELayer, batch_size, in_feat, hidden_feat, num_expert, top_k) ...@@ -126,11 +126,6 @@ def benchmark_mlp(MOELayer, batch_size, in_feat, hidden_feat, num_expert, top_k)
if __name__ == "__main__": if __name__ == "__main__":
os.environ["RANK"] = os.environ.get("OMPI_COMM_WORLD_RANK", "0")
os.environ["WORLD_SIZE"] = os.environ.get("OMPI_COMM_WORLD_SIZE", "1")
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get(
"OMPI_COMM_WORLD_LOCAL_RANK", "0"
)
if int(os.environ["WORLD_SIZE"]) > 1: if int(os.environ["WORLD_SIZE"]) > 1:
torch.distributed.init_process_group(backend="nccl") torch.distributed.init_process_group(backend="nccl")
rank = torch.distributed.get_rank() rank = torch.distributed.get_rank()
......
#!/bin/bash #!/bin/bash
if [ -z $MASTER_ADDR ]
if [ ! -z $OMPI_COMM_WORLD_LOCAL_RANK ]
then then
export CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK if [ -z $SLURM_JOB_ID ]
then
export MASTER_ADDR=localhost
else
export MASTER_ADDR=$(scontrol show JobId=$SLURM_JOB_ID | grep BatchHost | tr '=' ' ' | awk '{print $2}')
fi
fi fi
if [ -z $MASTER_PORT ] if [ -z $MASTER_PORT ]
then then
export MASTER_ADDR=localhost export MASTER_PORT=12215
export MASTER_PORT=36666
fi fi
if [ -z $OMPI_COMM_WORLD_RANK ] if [ ! -z $OMPI_COMM_WORLD_RANK ]
then then
RANK=single
else
RANK=$OMPI_COMM_WORLD_RANK RANK=$OMPI_COMM_WORLD_RANK
localrank=$OMPI_COMM_WORLD_LOCAL_RANK
elif [ ! -z $SLURM_PROCID ]
then
export RANK=$SLURM_PROCID
export WORLD_SIZE=$SLURM_NPROCS
localrank=$SLURM_LOCALID
else
RANK=0
localrank=0
WORLD_SIZE=1
fi fi
mkdir -p logs export CUDA_VISIBLE_DEVICES=$localrank
PYTHON_EXEC=python3
PYTHON_VERSION=$($PYTHON_EXEC --version)
PYTHON_REVISION=${PYTHON_VERSION:7:3}
SCRIPT_PATH=$(dirname $(dirname $(realpath $0)))
source ~/scripts/torch.env
export PYTHONPATH=$SCRIPT_PATH:$SCRIPT_PATH/build/lib.linux-x86_64-$PYTHON_REVISION:$PYTHONPATH
core0=$(expr $OMPI_COMM_WORLD_LOCAL_RANK \* 4)
cores=$core0-$(expr $core0 + 3)
exec numactl -C $cores $PYTHON_EXEC $@ 2>logs/$RANK.log exec $@
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment