#!/usr/bin/env bash # Author: Noel Chalmers # set -x #echo on # ################################################# # helper functions # ################################################# function display_help() { echo "rocHPL MPI run helper script" echo "./mpirun_rochpl " echo " [-P] Specific MPI grid size: the number of " echo " rows in MPI grid. " echo " [-Q] Specific MPI grid size: the number of " echo " columns in MPI grid. " echo " [-p] Specific node-local MPI grid size: the number " echo " of rows in node-local MPI grid. Must evenly " echo " divide P. " echo " [-q] Specific node-local MPI grid size: the number " echo " of columns in node-local MPI grid. Must evenly" echo " divide Q. " echo " [-N] Specific matrix size: the number of " echo " rows/columns in global matrix. " echo " [--NB] Specific panel size: the number of " echo " rows/columns in panels. " echo " [--it] Iterations: the number of times to run each " echo " problem size. " echo " [-f] Specific split fraction: the percentange to " echo " split the trailing submatrix. " echo " [-i] Input file. When set, all other commnand " echo " line parameters are ignored, and problem " echo " parameters are read from input file. " echo " [-H|--hosts] Comma-separated list of nodes to run on. " echo " [--tcp-iface] TCP interface to use for communication. " echo " [--port] SSH port to use for remote connections. " echo " [-h|--help] prints this help message " echo " [--version] Print rocHPL version number. " } # This function is helpful for dockerfiles that do not have sudo installed, but the default user is root # true is a system command that completes successfully, function returns success # prereq: ${ID} must be defined before calling supported_distro( ) { if [ -z ${ID+foo} ]; then printf "supported_distro(): \$ID must be set\n" exit 2 fi case "${ID}" in debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky) true ;; *) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n" exit 2 ;; esac } # ################################################# # Pre-requisites check # ################################################# # Exit code 0: alls well # Exit code 1: problems with getopt # Exit code 2: problems with supported platforms # check if getopt command is installed type getopt > /dev/null if [[ $? -ne 0 ]]; then echo "This script uses getopt to parse arguments; try installing the util-linux package"; exit 1 fi # os-release file describes the system if [[ -e "/etc/os-release" ]]; then source /etc/os-release else echo "This script depends on the /etc/os-release file" exit 2 fi # The following function exits script if an unsupported distro is detected supported_distro # Detect the number of GPUs per node ngpu_per_node=$(hy-smi --showid 2>/dev/null | grep -ic "Device ID") if [[ -z "${ngpu_per_node}" || "${ngpu_per_node}" -eq 0 ]]; then echo "Failed to get the number of GPUs per node via hy-smi. Defaulting to 8." ngpu_per_node=8 else echo "Detected ${ngpu_per_node} GPUs per node." fi # ################################################# # global variables # ################################################# # Grab options from CMake config rochpl_bin=@CMAKE_INSTALL_PREFIX@/bin/rochpl mpi_bin=@MPIEXEC_EXECUTABLE@ rochpl_runscript=$(dirname "$0")/run_rochpl #assume run_rochpl is in the same location P=1 Q=1 p=-1 q=-1 N=45312 NB=384 it=1 frac=0.3 filename=HPL.dat inputfile=false cmdrun=false tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl ompi_prefix=$tpl_dir/openmpi ompi_lib_dir=$tpl_dir/openmpi/lib ucx_lib_dir=$tpl_dir/ucx/lib export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH export OPAL_PREFIX=$ompi_prefix devices= nodes= tcp_iface=p14p2 ssh_port=3333 # ################################################# # MPI Args # ################################################# # count the number of physical cores num_cpu_cores=$(lscpu | grep "Core(s)" | awk '{print $4}') num_cpu_sockets=$(lscpu | grep Socket | awk '{print $2}') total_cpu_cores=$(($num_cpu_cores*$num_cpu_sockets)) # ################################################# # Parameter parsing # ################################################# # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then GETOPT_PARSE=$(getopt --name "${0}" --longoptions NB:,it:,help,version,devices:,hosts:,port:,tcp-iface:, --options hP:Q:p:q:N:i:f:H: -- "$@") else echo "Need a new version of getopt" exit 1 fi if [[ $? -ne 0 ]]; then echo "getopt invocation failed; could not parse the command line"; exit 1 fi eval set -- "${GETOPT_PARSE}" while true; do case "${1}" in -h|--help) display_help exit 0 ;; --version) ${mpi_bin} --allow-run-as-root -np 1 ${rochpl_runscript} --version exit 0 ;; -P) P=${2} shift 2 ;; -Q) Q=${2} shift 2 ;; -p) p=${2} shift 2 ;; -q) q=${2} shift 2 ;; -N) N=${2} cmdrun=true shift 2 ;; --NB) NB=${2} cmdrun=true shift 2 ;; --it) it=${2} shift 2 ;; -f) frac=${2} shift 2 ;; -i) filename=${2} inputfile=true shift 2 ;; --devices) devices=${2} shift 2 ;; -H|--hosts) nodes=${2} shift 2 ;; --tcp-iface) tcp_iface=${2} shift 2 ;; --port) ssh_port=${2} shift 2 ;; --) shift ; break ;; *) echo "Unexpected command line parameter received; aborting"; exit 1 ;; esac done #if nothing but np and ppn parameters where given, default to running # with default input file if [[ "${inputfile}" == false && "${cmdrun}" == false ]]; then inputfile=true fi np=$(($P*$Q)) if [[ "$np" -lt 1 ]]; then echo "Invalid MPI grid parameters; aborting"; exit 1 fi if [[ "${inputfile}" == true ]]; then rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -i ${filename} -f ${frac} --it ${it}" else rochpl_args="-P ${P} -Q ${Q} -p ${p} -q ${q} -N ${N} --NB ${NB} -f ${frac} --it ${it}" fi if [ ! -z "${devices}" ]; then rochpl_args+=" --devices=${devices}" fi # Run single-node test if --hosts is not set if [ -z "${nodes}" ]; then echo "No compute nodes specified. Running in single-node mode." ${mpi_bin} --allow-run-as-root \ --bind-to none \ --mca pml ucx \ --mca osc ucx \ --mca btl ^vader,tcp,openib,uct \ --mca coll ^hcoll \ -x UCX_TLS=self,sm,rocm \ -x UCX_RNDV_SCHEME=put_zcopy \ -x UCX_MEMTYPE_CACHE=n \ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \ -np ${np} \ ${rochpl_runscript} ${rochpl_args} else echo "Running in multi-node mode. Using nodes: ${nodes}" echo "Using TCP interface: ${tcp_iface}" echo "Using SSH port: ${ssh_port}" # Set rank counts for hosts IFS=',' read -ra node_array <<< "${nodes}" hosts_string="" for node in "${node_array[@]}"; do hosts_string+="${node}:${ngpu_per_node}," done hosts_string="${hosts_string%,}" echo "MPI hosts: ${hosts_string}" # Copy files to other nodes current_node=$(hostname) copyto_hosts=() for node in "${node_array[@]}"; do if [[ "${node}" != "${current_node}" ]]; then copyto_hosts+=("${node}") fi done # Copy files using rsync only if there are other nodes to copy to if [ ${#copyto_hosts[@]} -gt 0 ]; then echo "Copying files to other nodes in parallel: ${copyto_hosts[@]}" for node in "${copyto_hosts[@]}"; do if [[ "${inputfile}" == false ]]; then rsync -az -e "ssh -p ${ssh_port}" build tpl "${node}:/workspace/" & else rsync -az -e "ssh -p ${ssh_port}" build tpl ${filename} "${node}:/workspace/" & fi done wait echo "Files synchronized successfully." fi # Multi-node run ${mpi_bin} --allow-run-as-root \ --prefix ${ompi_prefix} \ --map-by ppr:${ngpu_per_node}:node --bind-to none \ --mca pml ucx \ --mca osc ucx \ --mca btl ^openib \ --mca btl_tcp_if_include ${tcp_iface} \ --mca plm_rsh_args "-p ${ssh_port}" \ --mca coll_hcoll_enable 0 \ -x UCX_TLS=self,sm,rocm,rc \ -x UCX_RNDV_SCHEME=put_zcopy \ -x UCX_RNDV_FRAG_MEM_TYPE=rocm \ -x UCX_MEMTYPE_CACHE=n \ -x HSA_FORCE_FINE_GRAIN_PCIE=1 \ -x PATH -x LD_LIBRARY_PATH -x OPAL_PREFIX \ -np ${np} \ -H ${hosts_string} \ ${rochpl_runscript} ${rochpl_args} fi