Commit fdb051c1 authored by zhaoying1's avatar zhaoying1
Browse files

Update run_train_single.sh

parent 410643b6
......@@ -14,7 +14,7 @@ lrank=$OMPI_COMM_WORLD_LOCAL_RANK
echo "LRANK===============================$lrank"
RANK=$OMPI_COMM_WORLD_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
export HIP_VISIBLE_DEVICES=0,1,2,3
LR=1e-5
APP="python3 ../main.py \
......@@ -44,50 +44,35 @@ APP="python3 ../main.py \
case ${lrank} in
[0])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_1:1
export UCX_IB_PCI_BW=mlx5_1:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_2:1
export UCX_IB_PCI_BW=mlx5_2:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_3:1
export UCX_IB_PCI_BW=mlx5_3:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[4])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_4:1
export UCX_IB_PCI_BW=mlx5_4:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
numactl --cpunodebind=4 --membind=4 ${APP}
;;
[5])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_5:1
export UCX_IB_PCI_BW=mlx5_5:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
numactl --cpunodebind=5 --membind=5 ${APP}
;;
[6])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_6:1
export UCX_IB_PCI_BW=mlx5_6:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
numactl --cpunodebind=6 --membind=6 ${APP}
;;
[7])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_7:1
export UCX_IB_PCI_BW=mlx5_7:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
;;
numactl --cpunodebind=7 --membind=7 ${APP}
;;
esac
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment