Commit abad43bf authored by one's avatar one
Browse files

[rccl-tests] Add topo mapping file

parent 8b3fbed1
......@@ -133,9 +133,9 @@ else
if [ ${#copyto_hosts[@]} -gt 0 ]; then
echo "[WRAPPER] Copying files to remote nodes in parallel: ${copyto_hosts[*]}"
for node in "${copyto_hosts[@]}"; do
rsync -az -e "ssh -p ${ssh_port}" ${PWD}/build ${PWD}/scripts ${rccltest_runscript} ${NCCL_TOPO_FILE} ${NCCL_GRAPH_FILE} "${node}:${PWD}/" &
rsync -az -e "ssh -p ${ssh_port}" /opt/dtk/rccl/lib ${node}:/opt/dtk/rccl/ &
rsync -az -e "ssh -p ${ssh_port}" /opt/mpi /opt/ucx ${node}:/opt/ &
rsync -azP -e "ssh -p ${ssh_port}" ${PWD}/build ${PWD}/scripts ${rccltest_runscript} ${NCCL_TOPO_FILE} ${NCCL_GRAPH_FILE} ${NCCL_TOPO_MAPPING_FILE} "${node}:${PWD}/" &
rsync -azP -e "ssh -p ${ssh_port}" /opt/dtk/rccl/lib ${node}:/opt/dtk/rccl/ &
rsync -azP -e "ssh -p ${ssh_port}" /opt/mpi /opt/ucx ${node}:/opt/ &
done
wait
echo "[WRAPPER] Files synchronized successfully."
......
......@@ -28,7 +28,8 @@ export NCCL_NET_GDR_READ=1
# export NCCL_PROTO=Simple
export NCCL_SIMPLE_CHANNELS=32
unset NCCL_NCHANNELS_PER_PEER
export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml
export NCCL_TOPO_MAPPING_FILE=${PWD}/topo-mapping-bw1000.xml
# export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml # topo 和 topo mapping 二选一即可
# export NCCL_GRAPH_FILE=${PWD}/graph-16r-allreduce.xml
./mpirun_rccltest -np 2 \
./build/all_reduce_perf -b 4 -e 16G -f 2 -w 3 -g 1
......
......@@ -26,7 +26,8 @@ export NCCL_P2P_LEVEL=SYS
export NCCL_NET_GDR_LEVEL=PHB
export NCCL_NET_GDR_READ=1
unset NCCL_NCHANNELS_PER_PEER
export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml
export NCCL_TOPO_MAPPING_FILE=${PWD}/topo-mapping-bw1000.xml
# export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml # topo 和 topo mapping 二选一即可
./mpirun_rccltest -np 2 \
./build/alltoall_perf -b 32 -e 16G -f 2 -w 3 -g 1
./mpirun_rccltest -np 4 \
......
......@@ -16,7 +16,8 @@ export NCCL_P2P_LEVEL=SYS
export NCCL_NET_GDR_LEVEL=PHB
export NCCL_NET_GDR_READ=1
unset NCCL_NCHANNELS_PER_PEER
export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml
export NCCL_TOPO_MAPPING_FILE=${PWD}/topo-mapping-bw1000.xml
# export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml # topo 和 topo mapping 二选一即可
for g in {0..7}; do
echo
......
<system version="2">
<group name="gfx936_8_x86_64_HygonGenuine_mlx5_11_InfiniBand_40-200-200-200-200-40-5-200-200-200-200_1_8_1">
<cpu numaid="3">
<pci>
<nic id="mlx5_1"/>
<nic id="mlx5_2"/>
<gpu dev="0"/>
<gpu dev="1"/>
</pci>
</cpu>
<cpu numaid="0">
<pci>
<nic id="mlx5_3"/>
<nic id="mlx5_4"/>
<gpu dev="2"/>
<gpu dev="3"/>
</pci>
</cpu>
<cpu numaid="7">
<pci>
<nic id="mlx5_7"/>
<nic id="mlx5_8"/>
<gpu dev="4"/>
<gpu dev="5"/>
</pci>
</cpu>
<cpu numaid="4">
<pci>
<nic id="mlx5_9"/>
<nic id="mlx5_10"/>
<gpu dev="6"/>
<gpu dev="7"/>
</pci>
</cpu>
</group>
</system>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment