"googlemock/git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "25cc5777a17820a6339204a3552aa1dd5e428669"
Commit 8794da26 authored by one's avatar one
Browse files

Add graph xml for rccl-tests

parent 7e4fc401
<graphs version="1">
<graph id="0" pattern="4" crossnic="0" nchannels="6" speedintra="28" speedinter="28" latencyinter="0" typeintra="NVL" typeinter="PIX" samechannels="1">
<channel>
<gpu dev="0"/>
<gpu dev="0x1"/>
<gpu dev="0x2"/>
<gpu dev="0x3"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x3"/>
<gpu dev="0x2"/>
<gpu dev="0x1"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x1"/>
<gpu dev="0x3"/>
<gpu dev="0x2"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x2"/>
<gpu dev="0x3"/>
<gpu dev="0x1"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x2"/>
<gpu dev="0x1"/>
<gpu dev="0x3"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x3"/>
<gpu dev="0x1"/>
<gpu dev="0x2"/>
</channel>
</graph>
<graph id="1" pattern="1" crossnic="0" nchannels="6" speedintra="28" speedinter="28" latencyinter="0" typeintra="NVL" typeinter="PIX" samechannels="1">
<channel>
<gpu dev="0"/>
<gpu dev="0x1"/>
<gpu dev="0x2"/>
<gpu dev="0x3"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x3"/>
<gpu dev="0x2"/>
<gpu dev="0x1"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x1"/>
<gpu dev="0x3"/>
<gpu dev="0x2"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x2"/>
<gpu dev="0x3"/>
<gpu dev="0x1"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x2"/>
<gpu dev="0x1"/>
<gpu dev="0x3"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="0x3"/>
<gpu dev="0x1"/>
<gpu dev="0x2"/>
</channel>
</graph>
<graph id="2" pattern="6" crossnic="0" nchannels="0" speedintra="0" speedinter="0" latencyinter="0" typeintra="LOC" typeinter="LOC" samechannels="0"/>
<graph id="2" pattern="3" crossnic="0" nchannels="0" speedintra="0" speedinter="0" latencyinter="0" typeintra="LOC" typeinter="LOC" samechannels="0"/>
<graph id="3" pattern="5" crossnic="0" nchannels="0" speedintra="0" speedinter="0" latencyinter="0" typeintra="LOC" typeinter="LOC" samechannels="0"/>
</graphs>
<graphs version="1">
<graph id="0" pattern="4" crossnic="0" nchannels="6" speedintra="36" speedinter="36" latencyinter="0" typeintra="NVL" typeinter="PIX" samechannels="1">
<channel>
<gpu dev="0x4"/>
<gpu dev="0x5"/>
<gpu dev="0x6"/>
<gpu dev="0x7"/>
</channel>
<channel>
<gpu dev="0x4"/>
<gpu dev="0x5"/>
<gpu dev="0x7"/>
<gpu dev="0x6"/>
</channel>
<channel>
<gpu dev="0x4"/>
<gpu dev="0x6"/>
<gpu dev="0x7"/>
<gpu dev="0x5"/>
</channel>
<channel>
<gpu dev="0x4"/>
<gpu dev="0x7"/>
<gpu dev="0x6"/>
<gpu dev="0x5"/>
</channel>
<channel>
<gpu dev="0x4"/>
<gpu dev="0x6"/>
<gpu dev="0x5"/>
<gpu dev="0x7"/>
</channel>
<channel>
<gpu dev="0x4"/>
<gpu dev="0x7"/>
<gpu dev="0x5"/>
<gpu dev="0x6"/>
</channel>
</graph>
<graph id="1" pattern="1" crossnic="0" nchannels="6" speedintra="42" speedinter="42" latencyinter="0" typeintra="NVL" typeinter="PIX" samechannels="1">
<channel>
<gpu dev="0x4"/>
<gpu dev="0x5"/>
<gpu dev="0x6"/>
<gpu dev="0x7"/>
</channel>
<channel>
<gpu dev="0x5"/>
<gpu dev="0x7"/>
<gpu dev="0x4"/>
<gpu dev="0x6"/>
</channel>
<channel>
<gpu dev="0x6"/>
<gpu dev="0x4"/>
<gpu dev="0x7"/>
<gpu dev="0x5"/>
</channel>
<channel>
<gpu dev="0x7"/>
<gpu dev="0x6"/>
<gpu dev="0x5"/>
<gpu dev="0x4"/>
</channel>
<channel>
<gpu dev="0x4"/>
<gpu dev="0x6"/>
<gpu dev="0x5"/>
<gpu dev="0x7"/>
</channel>
<channel>
<gpu dev="0x4"/>
<gpu dev="0x7"/>
<gpu dev="0x5"/>
<gpu dev="0x6"/>
</channel>
</graph>
<graph id="2" pattern="6" crossnic="0" nchannels="0" speedintra="0" speedinter="0" latencyinter="0" typeintra="LOC" typeinter="LOC" samechannels="0"/>
<graph id="2" pattern="3" crossnic="0" nchannels="0" speedintra="0" speedinter="0" latencyinter="0" typeintra="LOC" typeinter="LOC" samechannels="0"/>
<graph id="3" pattern="5" crossnic="0" nchannels="0" speedintra="0" speedinter="0" latencyinter="0" typeintra="LOC" typeinter="LOC" samechannels="0"/>
</graphs>
#!/bin/bash
export NCCL_P2P_LEVEL=SYS
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_GDR_READ=1
export NCCL_ALGO=Ring
export NCCL_PROTO=Simple
# GPUs: 0,1,2,3
# 可以直接用 NCCL_RINGS 设置
HIP_VISIBLE_DEVICES=0,1,2,3 \
NCCL_RINGS="0 1 2 3|0 3 2 1|0 1 3 2|0 2 3 1|0 2 1 3|0 3 1 2" \
./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4
# 也可以用 NCCL_GRAPH_FILE 设置
HIP_VISIBLE_DEVICES=0,1,2,3 \
NCCL_GRAPH_FILE=graph-allreduce-4r-0123.xml \
./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4
# GPUs: 4,5,6,7
# 目前只能用 NCCL_GRAPH_FILE 设置
# https://github.com/ROCm/rccl/pull/1301
HIP_VISIBLE_DEVICES=4,5,6,7 \
NCCL_GRAPH_FILE=graph-allreduce-4r-4567.xml \
./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -n 3 -g 4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment