Unverified Commit 0286389b authored by one's avatar one Committed by GitHub
Browse files

[onebenchmark] Add ansible files for rccl tests (#4)

parent 9e574a55
- import_playbook: start-cluster.yml
- import_playbook: run-tests.yml
#!/bin/bash
set -e
# =================================================
# Helper functions
# =================================================
help() {
cat << EOF
RCCL Tests MPI run helper script
Usage: $(basename "$0") [OPTIONS]
OPTIONS:
-h, --help Show this help message and exit
-np Total number of processes (default: sum of per-node counts in --hosts)
-H, --hosts Comma-separated list of nodes with optional process count per node
Format: node01:8,node02:8
If count is omitted, falls back to auto-detected GPU count per node.
--tcp-iface TCP interface to use for communication (default: ${tcp_iface})
--ssh-port SSH port to use for remote connections (default: ${ssh_port})
EOF
}
# =================================================
# Global variables
# =================================================
np=
hosts_raw=
tcp_iface=p14p2
ssh_port=3333
rccltest_args=()
mpi_bin=/opt/mpi/bin/mpirun
ompi_prefix=/opt/mpi
# Detect the number of GPUs per node (used as fallback when count is not specified in --hosts)
ngpu_per_node=$(hy-smi --showid 2>/dev/null | grep -ic "Device ID")
if [[ -z "${ngpu_per_node}" || "${ngpu_per_node}" -eq 0 ]]; then
echo "[WRAPPER] Failed to get the number of GPUs per node via hy-smi. Defaulting to 8."
ngpu_per_node=8
else
echo "[WRAPPER] Detected ${ngpu_per_node} GPUs per node."
fi
# =================================================
# Parameter parsing
# =================================================
while [[ $# -gt 0 ]]; do
case "${1}" in
-h|--help) help; exit 0 ;;
-np) np=${2}; shift 2 ;;
-H|--hosts) hosts_raw=${2}; shift 2 ;;
--tcp-iface) tcp_iface=${2}; shift 2 ;;
--ssh-port) ssh_port=${2}; shift 2 ;;
--) shift; rccltest_args+=("$@"); break ;;
*) rccltest_args+=("${1}"); shift ;;
esac
done
# =================================================
# Parse hosts into parallel arrays: node_names[], node_slots[]
# Input format: node01:8,node02:8 (count optional, falls back to ngpu_per_node)
# =================================================
parse_hosts() {
node_names=()
node_slots=()
IFS=',' read -ra entries <<< "${hosts_raw}"
for entry in "${entries[@]}"; do
local name="${entry%%:*}"
local slots="${entry##*:}"
# If no ':' was present, entry == name == slots
if [[ "${entry}" != *:* ]]; then
slots="${ngpu_per_node}"
fi
node_names+=("${name}")
node_slots+=("${slots}")
done
}
# =================================================
# Run rccl test script
# =================================================
if [ -z "${hosts_raw}" ]; then
# Run single-node test if --hosts is not set
echo "[WRAPPER] No compute nodes specified. Running in single-node mode."
# Default np to ngpu_per_node when not set
np="${np:-${ngpu_per_node}}"
echo "Using np=${np}"
${mpi_bin} --allow-run-as-root \
--bind-to none \
--mca pml ucx \
--mca osc ucx \
--mca btl ^vader,tcp,openib,uct \
--mca coll ^hcoll \
$(env | grep -E '^(NCCL|RCCL|UCX|HSA)_' | cut -d= -f1 | awk '{print "-x", $1}') \
-np ${np} \
"${rccltest_args[@]}"
else
# Multi-node mode
echo "[WRAPPER] Running in multi-node mode."
parse_hosts
# Build MPI -H string and auto-sum np
hosts_string=""
np_sum=0
for i in "${!node_names[@]}"; do
hosts_string+="${node_names[$i]}:${node_slots[$i]},"
(( np_sum += node_slots[$i] ))
done
hosts_string="${hosts_string%,}"
# -np overrides auto-sum if explicitly provided
np="${np:-${np_sum}}"
echo "[WRAPPER] MPI hosts: ${hosts_string}"
echo "[WRAPPER] Total processes (np): ${np}"
echo "[WRAPPER] Using TCP interface: ${tcp_iface}"
echo "[WRAPPER] Using SSH port: ${ssh_port}"
${mpi_bin} --allow-run-as-root \
--prefix ${ompi_prefix} \
--bind-to none \
--mca pml ucx \
--mca btl_tcp_if_include ${tcp_iface} \
--mca plm_rsh_args "-p ${ssh_port}" \
$(env | grep -E '^(NCCL|RCCL|UCX|HSA|HIP)_' | cut -d= -f1 | awk '{print "-x", $1}') \
-x ROCM_PATH -x PATH -x LD_LIBRARY_PATH \
-np ${np} \
-H ${hosts_string} \
"${rccltest_args[@]}"
fi
#!/bin/bash
set -e
unset UCX_HOME
# export UCX_LOG_LEVEL=fatal
export NCCL_TOPO_DUMP_FILE=${PWD}/topo-generated.xml
export NCCL_GRAPH_DUMP_FILE=${PWD}/graph-generated.xml
# export NCCL_DEBUG=INFO
# export NCCL_DEBUG_SUBSYS=ALL
#export RCCL_SDMA_COPY_ENABLE=1
#export RCCL_SDMA_LINK_MODE=0
# PCIe混合链路
# export NCCL_SIMPLE_CHANNELS=32
# export RCCL_P2P_XHCL_CHANNEL_NUM=31
# export RCCL_COLL_XHCL_CHANNEL_NUM=28
export HSA_FORCE_FINE_GRAIN_PCIE=1
export NCCL_SOCKET_IFNAME=p14p2
export NCCL_IB_HCA="=mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_7,mlx5_8,mlx5_9,mlx5_10"
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_GDR_READ=1
# export NCCL_ALGO=Ring
# export NCCL_PROTO=Simple
export NCCL_SIMPLE_CHANNELS=32
unset NCCL_NCHANNELS_PER_PEER
export NCCL_TOPO_MAPPING_FILE=${PWD}/topo-mapping-bw1000.xml
# export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml # topo 和 topo mapping 二选一即可
# export NCCL_GRAPH_FILE=${PWD}/graph-16r-allreduce.xml
mpirun_rccltest -np 2 \
all_reduce_perf -b 4 -e 16G -f 2 -w 3 -g 1
mpirun_rccltest -np 4 \
all_reduce_perf -b 4 -e 16G -f 2 -w 3 -g 1
mpirun_rccltest -np 8 \
all_reduce_perf -b 4 -e 16G -f 2 -w 3 -g 1
mpirun_rccltest -np 16 -H node01,node02 --ssh-port ${SSH_PORT} \
all_reduce_perf -b 4 -e 16G -f 2 -w 3 -g 1
mpirun_rccltest -np 32 -H node01,node02,node03,node04 --ssh-port ${SSH_PORT} \
all_reduce_perf -b 4 -e 16G -f 2 -w 3 -g 1
#!/bin/bash
set -e
unset UCX_HOME
# export UCX_LOG_LEVEL=fatal
export NCCL_TOPO_DUMP_FILE=${PWD}/topo-generated.xml
export NCCL_GRAPH_DUMP_FILE=${PWD}/graph-generated.xml
# export NCCL_DEBUG=INFO
# export NCCL_DEBUG_SUBSYS=ALL
#export RCCL_SDMA_COPY_ENABLE=1
#export RCCL_SDMA_LINK_MODE=0
# PCIe混合链路
# export NCCL_SIMPLE_CHANNELS=32
# export RCCL_P2P_XHCL_CHANNEL_NUM=31
# export RCCL_COLL_XHCL_CHANNEL_NUM=28
export HSA_FORCE_FINE_GRAIN_PCIE=1
export NCCL_SOCKET_IFNAME=p14p2
export NCCL_IB_HCA="=mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_7,mlx5_8,mlx5_9,mlx5_10"
export NCCL_P2P_LEVEL=SYS
export NCCL_NET_GDR_LEVEL=PHB
export NCCL_NET_GDR_READ=1
unset NCCL_NCHANNELS_PER_PEER
export NCCL_TOPO_MAPPING_FILE=${PWD}/topo-mapping-bw1000.xml
# export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml # topo 和 topo mapping 二选一即可
mpirun_rccltest -np 2 \
alltoall_perf -b 32 -e 16G -f 2 -w 3 -g 1
mpirun_rccltest -np 4 \
alltoall_perf -b 64 -e 16G -f 2 -w 3 -g 1
mpirun_rccltest -np 8 \
alltoall_perf -b 128 -e 16G -f 2 -w 3 -g 1
mpirun_rccltest -np 16 -H node01,node02 --ssh-port ${SSH_PORT} \
alltoall_perf -b 256 -e 16G -f 2 -w 3 -g 1
mpirun_rccltest -np 32 -H node01,node02,node03,node04 --ssh-port ${SSH_PORT} \
alltoall_perf -b 512 -e 16G -f 2 -w 3 -g 1
#!/bin/bash
set -e
unset UCX_HOME
export NCCL_TOPO_DUMP_FILE=${PWD}/topo-generated.xml
export NCCL_GRAPH_DUMP_FILE=${PWD}/graph-generated.xml
# export NCCL_DEBUG=INFO
# export NCCL_DEBUG_SUBSYS=ALL
export HSA_FORCE_FINE_GRAIN_PCIE=1
export NCCL_SOCKET_IFNAME=p14p2
export NCCL_IB_HCA="=mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_7,mlx5_8,mlx5_9,mlx5_10"
export NCCL_P2P_LEVEL=SYS
export NCCL_NET_GDR_LEVEL=PHB
export NCCL_NET_GDR_READ=1
unset NCCL_NCHANNELS_PER_PEER
export NCCL_TOPO_MAPPING_FILE=${PWD}/topo-mapping-bw1000.xml
# export NCCL_TOPO_FILE=${PWD}/topo-gdr-bw1000.xml # topo 和 topo mapping 二选一即可
for g in {0..7}; do
echo
echo "Running with GPU ${g}"
export HIP_VISIBLE_DEVICES=${g}
mpirun_rccltest -np 2 -H node01:1,node02:1 --ssh-port ${SSH_PORT} \
sendrecv_perf -b 2G -e 2G -f 2 -w 3 -g 1
echo
done
<system version="2">
<group name="gfx936_8_x86_64_HygonGenuine_mlx5_11_InfiniBand_40-200-200-200-200-40-5-200-200-200-200_1_8_1">
<cpu numaid="3">
<pci>
<nic id="mlx5_1"/>
<nic id="mlx5_2"/>
<gpu dev="0"/>
<gpu dev="1"/>
</pci>
</cpu>
<cpu numaid="0">
<pci>
<nic id="mlx5_3"/>
<nic id="mlx5_4"/>
<gpu dev="2"/>
<gpu dev="3"/>
</pci>
</cpu>
<cpu numaid="7">
<pci>
<nic id="mlx5_7"/>
<nic id="mlx5_8"/>
<gpu dev="4"/>
<gpu dev="5"/>
</pci>
</cpu>
<cpu numaid="4">
<pci>
<nic id="mlx5_9"/>
<nic id="mlx5_10"/>
<gpu dev="6"/>
<gpu dev="7"/>
</pci>
</cpu>
</group>
</system>
- name: Run RCCL Tests
hosts: all
become: yes
any_errors_fatal: true
vars_files:
- vars.yml
tasks:
- name: Create temp dir on remote host
ansible.builtin.file:
path: /tmp/{{ container_name }}/rccl-tests
state: directory
- name: Sync files to remote host
ansible.posix.synchronize:
src: "{{ playbook_dir }}/rccl-tests/"
dest: /tmp/{{ container_name }}/rccl-tests/
- name: Copy files into container
ansible.builtin.shell: |
docker cp /tmp/{{ container_name }}/rccl-tests/. {{ container_name }}:{{ work_dir }}
docker cp /tmp/{{ container_name }}/rccl-tests/mpirun_rccltest {{ container_name }}:/usr/local/bin/mpirun_rccltest
docker exec {{ container_name }} chmod +x /usr/local/bin/mpirun_rccltest
- name: Run tests
ansible.builtin.shell: |
docker exec -e SSH_PORT={{ ssh_port | string }} \
-w {{ work_dir }} \
{{ container_name }} \
bash {{ work_dir }}/{{ test_script }} 2>&1
delegate_to: "{{ groups['all'][0] }}"
run_once: true
register: result
- name: Show output
ansible.builtin.debug:
msg: "{{ result.stdout_lines }}"
run_once: true
- name: Cleanup temp dir
ansible.builtin.file:
path: /tmp/{{ container_name }}
state: absent
- name: Start docker cluster
hosts: all
become: yes
any_errors_fatal: true
vars_files:
- vars.yml
vars:
rccl_tests_install_dir: /workspace/rccl-tests
tasks:
- name: Run cluster container
community.docker.docker_container:
name: "{{ container_name }}"
image: "{{ image_name }}"
recreate: "{{ force_rm | default(false) | bool }}"
state: started
hostname: "{{ inventory_hostname }}"
network_mode: host
ipc_mode: host
privileged: yes
shm_size: 512G
volumes:
- /opt/hyhal:/opt/hyhal:ro
- /root/.ssh:/root/.ssh
working_dir: /workspace
etc_hosts: "{{ dict(groups['all'] | zip(groups['all'] | map('extract', hostvars, ['ansible_facts', 'default_ipv4', 'address']))) }}"
command: >
bash -c "mkdir -p /run/sshd && /usr/sbin/sshd -p {{ ssh_port }}; sleep infinity"
- name: Check if rccl-tests is already installed
community.docker.docker_container_exec:
container: "{{ container_name }}"
command: test -f /usr/local/bin/all_reduce_perf
register: rccl_installed
failed_when: false
changed_when: false
- name: Install rccl-tests
when: rccl_installed.rc != 0 or (force_reinstall | default(false) | bool)
block:
- name: Clone rccl-tests
community.docker.docker_container_exec:
container: "{{ container_name }}"
command: >
bash -c "
rm -rf {{ rccl_tests_install_dir }} &&
git clone https://github.com/ROCm/rccl-tests.git -b master {{ rccl_tests_install_dir }}
"
- name: Build rccl-tests
community.docker.docker_container_exec:
container: "{{ container_name }}"
command: >
bash -c "
cd {{ rccl_tests_install_dir }} &&
ln -sf $(which hipify-perl) /opt/dtk/bin/hipify-perl &&
./install.sh --mpi --mpi_home /opt/mpi \
--rocm_home /opt/dtk \
--rccl_home /opt/dtk/rccl \
--hip_compiler hipcc \
--gpu_targets {{ gpu_target }}
"
- name: Copy rccl-tests binaries to global PATH
community.docker.docker_container_exec:
container: "{{ container_name }}"
command: >
bash -c "
cp {{ rccl_tests_install_dir }}/build/*_perf /usr/local/bin/ &&
chmod +x /usr/local/bin/*_perf
"
- name: Verify rccl-tests installation
community.docker.docker_container_exec:
container: "{{ container_name }}"
command: all_reduce_perf --help
changed_when: false
# image_name: harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.11.0-ubuntu22.04-dtk26.04-0130-py3.10-20260204
image_name: onebenchmark-dtk26.04
container_name: benchmark-dtk26-0316
ssh_port: 3433
gpu_target: gfx936
work_dir: /workspace/rccl-tests
test_script: run-sendrecv.sh
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment