run-allreduce.sh 747 Bytes
Newer Older
one's avatar
one committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#!/bin/bash

export NCCL_P2P_LEVEL=SYS
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_GDR_READ=1
export NCCL_ALGO=Ring
export NCCL_PROTO=Simple

# GPUs: 0,1,2,3
# 可以直接用 NCCL_RINGS 设置
HIP_VISIBLE_DEVICES=0,1,2,3 \
NCCL_RINGS="0 1 2 3|0 3 2 1|0 1 3 2|0 2 3 1|0 2 1 3|0 3 1 2" \
./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4

# 也可以用 NCCL_GRAPH_FILE 设置
HIP_VISIBLE_DEVICES=0,1,2,3 \
NCCL_GRAPH_FILE=graph-allreduce-4r-0123.xml \
./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4

# GPUs: 4,5,6,7
# 目前只能用 NCCL_GRAPH_FILE 设置
# https://github.com/ROCm/rccl/pull/1301
HIP_VISIBLE_DEVICES=4,5,6,7 \
NCCL_GRAPH_FILE=graph-allreduce-4r-4567.xml \
./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4