run_8gpu_fp32.sh 932 Bytes
Newer Older
liangjing's avatar
liangjing committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash
cp rundir_8gpu/init_env.py .
export MIOPEN_FIND_MODE=1
export PADDLE_TRAINERS_NUM=8
export PADDLE_TRAINER_ENDPOINTS=localhost:60005,localhost:60006,localhost:60007,localhost:60008,localhost:60009,localhost:60010,localhost:60011,localhost:60012
export PYTHON=python3
export SEED=${SEED:-"$RANDOM"}
export LD_LIBRARY_PATH=/opt/dtk-21.04/rccl/lib:$LD_LIBRARY_PATH
export HSA_FORCE_FINE_GRAIN_PCIE=1
export NCCL_P2P_LEVEL=5
export use_hierarchical_allreduce=True
export num_process=16

if [[ $num_process -gt 1 ]]; then
  ORTERUN=`which orterun`
  mpirun="mpirun --allow-run-as-root -np $num_process --bind-to none -x PADDLE_TRAINERS_NUM -x PADDLE_TRAINER_ENDPOINTS -x LD_LIBRARY_PATH -x SEED -x PYTHON -x NPROC_PER_NODE -x use_hierarchical_allreduce ./run_benchmark_8gpu_fp32.sh"
else
  mpirun=""
fi

echo "command is " $mpirun $CMD
for NPROC_PER_NODE in 8; do
  export NPROC_PER_NODE=$NPROC_PER_NODE
  $mpirun $CMD
done