#!/bin/bash export NCCL_P2P_LEVEL=SYS export NCCL_NET_GDR_LEVEL=SYS export NCCL_NET_GDR_READ=1 export NCCL_ALGO=Ring export NCCL_PROTO=Simple # GPUs: 0,1,2,3 # 可以直接用 NCCL_RINGS 设置 HIP_VISIBLE_DEVICES=0,1,2,3 \ NCCL_RINGS="0 1 2 3|0 3 2 1|0 1 3 2|0 2 3 1|0 2 1 3|0 3 1 2" \ ./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4 # 也可以用 NCCL_GRAPH_FILE 设置 HIP_VISIBLE_DEVICES=0,1,2,3 \ NCCL_GRAPH_FILE=graph-allreduce-4r-0123.xml \ ./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4 # GPUs: 4,5,6,7 # 目前只能用 NCCL_GRAPH_FILE 设置 # https://github.com/ROCm/rccl/pull/1301 HIP_VISIBLE_DEVICES=4,5,6,7 \ NCCL_GRAPH_FILE=graph-allreduce-4r-4567.xml \ ./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4