run-sendrecv.sh 824 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/bash
set -e

unset UCX_HOME
export NCCL_TOPO_DUMP_FILE=${PWD}/topo-generated.xml
export NCCL_GRAPH_DUMP_FILE=${PWD}/graph-generated.xml
# export NCCL_DEBUG=INFO
# export NCCL_DEBUG_SUBSYS=ALL

export HSA_FORCE_FINE_GRAIN_PCIE=1
export NCCL_SOCKET_IFNAME=p14p2
export NCCL_IB_HCA="=mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_7,mlx5_8,mlx5_9,mlx5_10"
export NCCL_P2P_LEVEL=SYS
export NCCL_NET_GDR_LEVEL=PHB
export NCCL_NET_GDR_READ=1
unset NCCL_NCHANNELS_PER_PEER
export NCCL_TOPO_MAPPING_FILE=${PWD}/topo-mapping-bw1000.xml
# export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml # topo 和 topo mapping 二选一即可

for g in {0..7}; do
echo 
echo "Running with GPU ${g}"
export HIP_VISIBLE_DEVICES=${g}
  mpirun_rccltest -np 2 -H node01:1,node02:1 --ssh-port ${SSH_PORT} \
    sendrecv_perf -b 2G -e 2G -f 2 -w 3 -g 1
echo
done