#!/bin/bash install_dir=/opt with_rocm=/opt/dtk ucx_version=1.20.0 ompi_version=5.0.8 ompi_prefix=${install_dir}/mpi ucx_prefix=${install_dir}/ucx exit_with_error( ) { if (( $1 == 2 )); then # Failure in some install step # Print some message about needed dependencies # dependencies needed for executable to build local library_dependencies_ubuntu=( "git" "make" "cmake" "libnuma-dev" "pkg-config" "autoconf" "libtool" "automake" "m4" "flex" "libgomp1" "libibverbs-dev" "librdmacm-dev" ) local library_dependencies_centos=( "git" "make" "cmake3" "gcc-c++" "rpm-build" "epel-release" "numactl-libs" "autoconf" "libtool" "automake" "m4" "flex" "libgomp" "libibverbs-devel" "librdmacm-devel" ) local library_dependencies_fedora=( "git" "make" "cmake" "gcc-c++" "libcxx-devel" "rpm-build" "numactl-libs" "autoconf" "libtool" "automake" "m4" "flex" "libgomp" "libibverbs-devel" "librdmacm-devel" ) local library_dependencies_sles=( "git" "make" "cmake" "gcc-c++" "libcxxtools9" "rpm-build" "libnuma-devel" "autoconf" "libtool" "automake" "m4" "flex" "libgomp1" "libibverbs-devel" "librdmacm-devel" ) printf "Installation failed. Some required packages may be missing.\n" printf "The following package manager install command may be needed:\n" case "${ID}" in ubuntu) printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n" ;; centos|rhel|tencentos|kylin) printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n" ;; fedora|rocky) printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n" ;; sles) printf "sudo zypper -n --no-gpg-checks install ${library_dependencies_sles[*]}\n" ;; *) exit 2 ;; esac fi exit $1 } check_exit_code( ) { if (( $? != 0 )); then err=$1 msg=$2 if [[ "$msg" == "" ]]; then msg="Unknown error" fi echo "ERROR: $msg" exit $err fi } # Clone and build OpenMPI+UCX in /opt install_openmpi( ) { # Backup old installations if they exist (overwrite existing .bak if present) [ -d "${ompi_prefix}" ] && { rm -rf ${ompi_prefix}.bak; mv ${ompi_prefix} ${ompi_prefix}.bak; } [ -d "${ucx_prefix}" ] && { rm -rf ${ucx_prefix}.bak; mv ${ucx_prefix} ${ucx_prefix}.bak; } # OpenMPI and UCX install to one of these locations depending on OS ucx_lib_folder=${ucx_prefix}/lib ompi_lib_folder=${ompi_prefix}/lib ucx_lib64_folder=${ucx_prefix}/lib64 ompi_lib64_folder=${ompi_prefix}/lib64 ucx_build_cmd="./contrib/configure-release --prefix=${ucx_prefix} \ --enable-cma --enable-mt \ --with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \ --with-verbs=/usr/include --with-rdmacm=/usr \ --with-rocm=${with_rocm} \ --without-knem --without-cuda --without-java" if [ ! -d "./ucx" ]; then ucx_src=ucx-${ucx_version} ucx_tarball=${ucx_src}.tar.gz if [ ! -f "${ucx_tarball}" ]; then wget https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball} fi check_exit_code 2 tar -zxf ${ucx_tarball} check_exit_code 2 mv ${ucx_src} ucx check_exit_code 2 cd ucx; check_exit_code 2 ${ucx_build_cmd} check_exit_code 2 make -j$(nproc) check_exit_code 2 make install check_exit_code 2 cd .. elif ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \ [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) && \ ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \ [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then cd ucx; check_exit_code 2 ${ucx_build_cmd} check_exit_code 2 make -j$(nproc) check_exit_code 2 make install check_exit_code 2 cd .. fi # Check for successful build if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \ [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) && ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \ [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then echo "Error: UCX install unsuccessful." exit 3 fi export LD_LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LD_LIBRARY_PATH}" export LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LIBRARY_PATH}" export CPATH="${ucx_prefix}/include:${CPATH}" ompi_build_cmd="./configure --prefix=${ompi_prefix} \ --with-ucx=${ucx_prefix} \ --with-rocm=${with_rocm} \ --enable-builtin-atomics \ --enable-wrapper-rpath \ --without-verbs --enable-mca-no-build=btl-uct" if [ ! -d "./openmpi" ]; then ompi_src=openmpi-${ompi_version} ompi_tarball=${ompi_src}.tar.gz if [ ! -f "${ompi_tarball}" ]; then wget https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball} fi check_exit_code 2 tar -zxf ${ompi_tarball} check_exit_code 2 mv ${ompi_src} openmpi check_exit_code 2 cd openmpi check_exit_code 2 ${ompi_build_cmd} check_exit_code 2 make -j$(nproc) check_exit_code 2 make install check_exit_code 2 cd .. elif [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then cd openmpi check_exit_code 2 ${ompi_build_cmd} check_exit_code 2 make -j$(nproc) check_exit_code 2 make install check_exit_code 2 cd .. fi # Check for successful build if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then echo "Error: OpenMPI install unsuccessful." exit_with_error 2 fi } install_openmpi