#!/bin/bash set -euo pipefail install_prefix=/opt with_rocm=/opt/dtk ompi_version=5.0.8 ucx_version=1.20.0 ucx_enable_tuning=0 ompi_prefix="${install_prefix}/mpi" ucx_prefix="${install_prefix}/ucx" display_help() { cat < Path to install location (Default: ${install_prefix}) --with-rocm= Path to ROCm install (Default: ${with_rocm}) --ompi-version= OpenMPI version (Default: ${ompi_version}) --ucx-version= UCX version (Default: ${ucx_version}) --ucx-tuning Enable UCX tuning EOF } # prereq: ${ID} must be defined before calling supported_distro() { if [ -z "${ID+foo}" ]; then printf "supported_distro(): \$ID must be set\n" exit 2 fi case "${ID}" in debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky) true ;; *) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n" exit 2 ;; esac } exit_with_error() { if (( $1 == 2 )); then # Failure in some install step # Print some message about needed dependencies # dependencies needed for executable to build local library_dependencies_ubuntu=( "git" "make" "cmake" "libnuma-dev" "pkg-config" "autoconf" "libtool" "automake" "m4" "flex" "libgomp1" "libibverbs-dev" "librdmacm-dev" ) local library_dependencies_centos=( "git" "make" "cmake3" "gcc-c++" "rpm-build" "epel-release" "numactl-libs" "autoconf" "libtool" "automake" "m4" "flex" "libgomp" "libibverbs-devel" "librdmacm-devel" ) local library_dependencies_fedora=( "git" "make" "cmake" "gcc-c++" "libcxx-devel" "rpm-build" "numactl-libs" "autoconf" "libtool" "automake" "m4" "flex" "libgomp" "libibverbs-devel" "librdmacm-devel" ) local library_dependencies_sles=( "git" "make" "cmake" "gcc-c++" "libcxxtools9" "rpm-build" "libnuma-devel" "autoconf" "libtool" "automake" "m4" "flex" "libgomp1" "libibverbs-devel" "librdmacm-devel" ) printf "Installation failed. Some required packages may be missing.\n" printf "The following package manager install command may be needed:\n" case "${ID}" in ubuntu) printf "sudo apt install -y %s\n" "${library_dependencies_ubuntu[*]}" ;; centos|rhel|tencentos|kylin) printf "sudo yum -y --nogpgcheck install %s\n" "${library_dependencies_centos[*]}" ;; fedora|rocky) printf "sudo dnf install -y %s\n" "${library_dependencies_fedora[*]}" ;; sles) printf "sudo zypper -n --no-gpg-checks install %s\n" "${library_dependencies_sles[*]}" ;; *) exit 2 ;; esac fi exit "$1" } # Clone and build OpenMPI+UCX in /opt install_openmpi() { # Backup old installations if they exist (overwrite existing .bak if present) [ -d "${ompi_prefix}" ] && { rm -rf "${ompi_prefix}.bak"; mv "${ompi_prefix}" "${ompi_prefix}.bak"; } [ -d "${ucx_prefix}" ] && { rm -rf "${ucx_prefix}.bak"; mv "${ucx_prefix}" "${ucx_prefix}.bak"; } # OpenMPI and UCX install to one of these locations depending on OS local ucx_lib_folder="${ucx_prefix}/lib" local ompi_lib_folder="${ompi_prefix}/lib" local ucx_lib64_folder="${ucx_prefix}/lib64" local ompi_lib64_folder="${ompi_prefix}/lib64" local src_dir="${PWD}" local ucx_src="${src_dir}/ucx-${ucx_version}" local ucx_tarball="ucx-${ucx_version}.tar.gz" local ompi_src="${src_dir}/openmpi-${ompi_version}" local ompi_tarball="openmpi-${ompi_version}.tar.gz" # Download UCX on demand rm -rf "${ucx_src}" if [ ! -f "${ucx_tarball}" ]; then wget "https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball}" \ || { echo "Failed to download UCX tarball"; exit 1; } fi tar -zxf "${ucx_tarball}" # Download OpenMPI on demand rm -rf "${ompi_src}" if [ ! -f "${ompi_tarball}" ]; then wget "https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball}" \ || { echo "Failed to download OpenMPI tarball"; exit 1; } fi tar -zxf "${ompi_tarball}" cd "${ucx_src}" || { echo "Failed to cd into ${ucx_src}"; exit 1; } local ucx_tuning_flag="" if (( ucx_enable_tuning == 1 )); then ucx_tuning_flag="--enable-tuning" fi ./contrib/configure-release --prefix="${ucx_prefix}" \ --enable-optimizations ${ucx_tuning_flag} \ --enable-cma --enable-mt \ --with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \ --with-verbs=/usr/include --with-rdmacm=/usr \ --with-rocm="${with_rocm}" \ --without-knem --without-cuda --without-java make -j"$(nproc)" make install # Check for successful build if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \ [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) && ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \ [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then echo "Error: UCX install unsuccessful." exit_with_error 2 fi export LD_LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LD_LIBRARY_PATH:-}" export LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LIBRARY_PATH:-}" export CPATH="${ucx_prefix}/include:${CPATH:-}" cd "${ompi_src}" || { echo "Failed to cd into ${ompi_src}"; exit 1; } ./configure --prefix="${ompi_prefix}" \ --with-ucx="${ucx_prefix}" \ --with-rocm="${with_rocm}" \ --enable-builtin-atomics \ --enable-wrapper-rpath \ --enable-mca-no-build=btl-uct make -j"$(nproc)" make install # Check for successful build if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then echo "Error: OpenMPI install unsuccessful." exit_with_error 2 fi } # ################################################# # Pre-requisites check # ################################################# # Exit code 0: alls well # Exit code 1: getopt parse failure or unexpected argument # Exit code 2: unsupported distro or install failure # os-release file describes the system source /etc/os-release # The following function exits script if an unsupported distro is detected supported_distro # ################################################# # Parameter parsing # ################################################# GETOPT_PARSE=$(getopt --name "${0}" --options h --longoptions help,prefix:,with-rocm:,ompi-version:,ucx-version:,ucx-tuning -- "$@") \ || { echo "getopt invocation failed; could not parse the command line"; exit_with_error 1; } eval set -- "${GETOPT_PARSE}" while true; do case "${1}" in -h|--help) display_help exit 0 ;; --prefix) install_prefix="${2}" ompi_prefix="${install_prefix}/mpi" ucx_prefix="${install_prefix}/ucx" shift 2 ;; --with-rocm) with_rocm="${2}" shift 2 ;; --ompi-version) ompi_version="${2}" shift 2 ;; --ucx-version) ucx_version="${2}" shift 2 ;; --ucx-tuning) ucx_enable_tuning=1 shift 1 ;; --) shift ; break ;; *) echo "Unexpected command line parameter received; aborting"; exit_with_error 1 ;; esac done install_openmpi