Commit 9e574a55 authored by one's avatar one
Browse files

[rccl-tests] Clean up mpirun_rccltest

parent 22868e9f
...@@ -28,7 +28,6 @@ hosts_raw= ...@@ -28,7 +28,6 @@ hosts_raw=
tcp_iface=p14p2 tcp_iface=p14p2
ssh_port=3333 ssh_port=3333
rccltest_runscript="${PWD}/run_rccltest"
rccltest_args=() rccltest_args=()
mpi_bin=/opt/mpi/bin/mpirun mpi_bin=/opt/mpi/bin/mpirun
ompi_prefix=/opt/mpi ompi_prefix=/opt/mpi
...@@ -97,7 +96,7 @@ if [ -z "${hosts_raw}" ]; then ...@@ -97,7 +96,7 @@ if [ -z "${hosts_raw}" ]; then
--mca coll ^hcoll \ --mca coll ^hcoll \
$(env | grep -E '^(NCCL|RCCL|UCX|HSA)_' | cut -d= -f1 | awk '{print "-x", $1}') \ $(env | grep -E '^(NCCL|RCCL|UCX|HSA)_' | cut -d= -f1 | awk '{print "-x", $1}') \
-np ${np} \ -np ${np} \
${rccltest_runscript} "${rccltest_args[@]}" "${rccltest_args[@]}"
else else
# Multi-node mode # Multi-node mode
echo "[WRAPPER] Running in multi-node mode." echo "[WRAPPER] Running in multi-node mode."
...@@ -133,7 +132,7 @@ else ...@@ -133,7 +132,7 @@ else
if [ ${#copyto_hosts[@]} -gt 0 ]; then if [ ${#copyto_hosts[@]} -gt 0 ]; then
echo "[WRAPPER] Copying files to remote nodes in parallel: ${copyto_hosts[*]}" echo "[WRAPPER] Copying files to remote nodes in parallel: ${copyto_hosts[*]}"
for node in "${copyto_hosts[@]}"; do for node in "${copyto_hosts[@]}"; do
rsync -azP -e "ssh -p ${ssh_port}" ${PWD}/build ${PWD}/scripts ${rccltest_runscript} ${NCCL_TOPO_FILE} ${NCCL_GRAPH_FILE} ${NCCL_TOPO_MAPPING_FILE} "${node}:${PWD}/" & rsync -azP -e "ssh -p ${ssh_port}" ${PWD}/build ${PWD}/scripts ${NCCL_TOPO_FILE} ${NCCL_GRAPH_FILE} ${NCCL_TOPO_MAPPING_FILE} "${node}:${PWD}/" &
rsync -azP -e "ssh -p ${ssh_port}" /opt/dtk/rccl/lib ${node}:/opt/dtk/rccl/ & rsync -azP -e "ssh -p ${ssh_port}" /opt/dtk/rccl/lib ${node}:/opt/dtk/rccl/ &
rsync -azP -e "ssh -p ${ssh_port}" /opt/mpi /opt/ucx ${node}:/opt/ & rsync -azP -e "ssh -p ${ssh_port}" /opt/mpi /opt/ucx ${node}:/opt/ &
done done
...@@ -148,7 +147,7 @@ else ...@@ -148,7 +147,7 @@ else
--mca btl_tcp_if_include ${tcp_iface} \ --mca btl_tcp_if_include ${tcp_iface} \
--mca plm_rsh_args "-p ${ssh_port}" \ --mca plm_rsh_args "-p ${ssh_port}" \
$(env | grep -E '^(NCCL|RCCL|UCX|HSA|HIP)_' | cut -d= -f1 | awk '{print "-x", $1}') \ $(env | grep -E '^(NCCL|RCCL|UCX|HSA|HIP)_' | cut -d= -f1 | awk '{print "-x", $1}') \
-x PATH -x LD_LIBRARY_PATH \ -x ROCM_PATH -x PATH -x LD_LIBRARY_PATH \
-np ${np} \ -np ${np} \
-H ${hosts_string} \ -H ${hosts_string} \
"${rccltest_args[@]}" "${rccltest_args[@]}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment