Commit b755db38 authored by one's avatar one
Browse files

Improve install-mpi.sh

parent 116ae127
#!/bin/bash
set -euo pipefail
ucx_version=1.20.0
ompi_version=5.0.9
install_prefix=/opt
with_rocm=/opt/dtk
ompi_version=5.0.8
ucx_version=1.20.0
ucx_enable_tuning=0
ompi_prefix="${install_prefix}/mpi"
ucx_prefix="${install_prefix}/ucx"
display_help() {
cat <<EOF
OpenMPI and UCX build helper script
Usage: $(basename "$0") [OPTIONS]
Options:
-h, --help Show this help message
--prefix=<dir> Path to install location (Default: ${install_prefix})
--with-rocm=<dir> Path to ROCm install (Default: ${with_rocm})
--ompi-version=<version> OpenMPI version (Default: ${ompi_version})
--ucx-version=<version> UCX version (Default: ${ucx_version})
--ucx-tuning Enable UCX tuning
EOF
}
install_dir=/opt
ucx_prefix=${install_dir}/ucx
ompi_prefix=${install_dir}/mpi
# prereq: ${ID} must be defined before calling
supported_distro() {
if [ -z "${ID+foo}" ]; then
printf "supported_distro(): \$ID must be set\n"
exit 2
fi
case "${ID}" in
debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
exit 2
;;
esac
}
exit_with_error( )
{
exit_with_error() {
if (( $1 == 2 )); then
# Failure in some install step
# Print some message about needed dependencies
......@@ -24,98 +57,81 @@ exit_with_error( )
printf "The following package manager install command may be needed:\n"
case "${ID}" in
ubuntu)
printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n"
printf "sudo apt install -y %s\n" "${library_dependencies_ubuntu[*]}"
;;
centos|rhel|tencentos|kylin)
printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n"
printf "sudo yum -y --nogpgcheck install %s\n" "${library_dependencies_centos[*]}"
;;
fedora|rocky)
printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n"
printf "sudo dnf install -y %s\n" "${library_dependencies_fedora[*]}"
;;
sles)
printf "sudo zypper -n --no-gpg-checks install ${library_dependencies_sles[*]}\n"
printf "sudo zypper -n --no-gpg-checks install %s\n" "${library_dependencies_sles[*]}"
;;
*)
*)
exit 2
;;
esac
fi
exit $1
}
fi
check_exit_code( )
{
if (( $? != 0 )); then
err=$1
msg=$2
if [[ "$msg" == "" ]]; then
msg="Unknown error"
fi
echo "ERROR: $msg"
exit $err
fi
exit "$1"
}
# Clone and build OpenMPI+UCX in /opt
install_openmpi( )
{
install_openmpi() {
# Backup old installations if they exist (overwrite existing .bak if present)
[ -d "${ompi_prefix}" ] && {
rm -rf ${ompi_prefix}.bak; mv ${ompi_prefix} ${ompi_prefix}.bak;
rm -rf "${ompi_prefix}.bak"; mv "${ompi_prefix}" "${ompi_prefix}.bak";
}
[ -d "${ucx_prefix}" ] && {
rm -rf ${ucx_prefix}.bak; mv ${ucx_prefix} ${ucx_prefix}.bak;
rm -rf "${ucx_prefix}.bak"; mv "${ucx_prefix}" "${ucx_prefix}.bak";
}
# OpenMPI and UCX install to one of these locations depending on OS
local ucx_lib_folder=${ucx_prefix}/lib
local ompi_lib_folder=${ompi_prefix}/lib
local ucx_lib64_folder=${ucx_prefix}/lib64
local ompi_lib64_folder=${ompi_prefix}/lib64
local ucx_lib_folder="${ucx_prefix}/lib"
local ompi_lib_folder="${ompi_prefix}/lib"
local ucx_lib64_folder="${ucx_prefix}/lib64"
local ompi_lib64_folder="${ompi_prefix}/lib64"
local src_dir=${PWD}
local ucx_src=${src_dir}/ucx-${ucx_version}
local ucx_tarball=ucx-${ucx_version}.tar.gz
local ompi_src=${src_dir}/openmpi-${ompi_version}
local ompi_tarball=openmpi-${ompi_version}.tar.gz
local src_dir="${PWD}"
local ucx_src="${src_dir}/ucx-${ucx_version}"
local ucx_tarball="ucx-${ucx_version}.tar.gz"
local ompi_src="${src_dir}/openmpi-${ompi_version}"
local ompi_tarball="openmpi-${ompi_version}.tar.gz"
# Download UCX on demand
if [ ! -d "${ucx_src}" ]; then
if [ ! -f "${ucx_tarball}" ]; then
wget https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball}
fi
check_exit_code 2
tar -zxf ${ucx_tarball}
check_exit_code 2
rm -rf "${ucx_src}"
if [ ! -f "${ucx_tarball}" ]; then
wget "https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball}" \
|| { echo "Failed to download UCX tarball"; exit 1; }
fi
tar -zxf "${ucx_tarball}"
# Download OpenMPI on demand
if [ ! -d "${ompi_src}" ]; then
if [ ! -f "${ompi_tarball}" ]; then
wget https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball}
fi
check_exit_code 2
tar -zxf ${ompi_tarball}
check_exit_code 2
rm -rf "${ompi_src}"
if [ ! -f "${ompi_tarball}" ]; then
wget "https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball}" \
|| { echo "Failed to download OpenMPI tarball"; exit 1; }
fi
tar -zxf "${ompi_tarball}"
# Build UCX
cd ${ucx_src};
./contrib/configure-release --prefix=${ucx_prefix} \
cd "${ucx_src}" || { echo "Failed to cd into ${ucx_src}"; exit 1; }
local ucx_tuning_flag=""
if (( ucx_enable_tuning == 1 )); then
ucx_tuning_flag="--enable-tuning"
fi
./contrib/configure-release --prefix="${ucx_prefix}" \
--enable-optimizations ${ucx_tuning_flag} \
--enable-cma --enable-mt \
--with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \
--with-verbs=/usr/include --with-rdmacm=/usr \
--with-rocm=${with_rocm} \
--with-rocm="${with_rocm}" \
--without-knem --without-cuda --without-java
check_exit_code 2
make -j$(nproc)
check_exit_code 2
make -j"$(nproc)"
make install
check_exit_code 2
cd ..
# Check for successful build
if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \
......@@ -123,27 +139,22 @@ install_openmpi( )
([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \
[ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then
echo "Error: UCX install unsuccessful."
exit 3
exit_with_error 2
fi
export LD_LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LD_LIBRARY_PATH}"
export LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LIBRARY_PATH}"
export CPATH="${ucx_prefix}/include:${CPATH}"
export LD_LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LD_LIBRARY_PATH:-}"
export LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LIBRARY_PATH:-}"
export CPATH="${ucx_prefix}/include:${CPATH:-}"
# Build OpenMPI
cd ${ompi_src}
./configure --prefix=${ompi_prefix} \
--with-ucx=${ucx_prefix} \
--with-rocm=${with_rocm} \
cd "${ompi_src}" || { echo "Failed to cd into ${ompi_src}"; exit 1; }
./configure --prefix="${ompi_prefix}" \
--with-ucx="${ucx_prefix}" \
--with-rocm="${with_rocm}" \
--enable-builtin-atomics \
--enable-wrapper-rpath \
--without-verbs --enable-mca-no-build=btl-uct
check_exit_code 2
make -j$(nproc)
check_exit_code 2
--enable-mca-no-build=btl-uct
make -j"$(nproc)"
make install
check_exit_code 2
cd ..
# Check for successful build
if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then
......@@ -152,4 +163,56 @@ install_openmpi( )
fi
}
install_openmpi
\ No newline at end of file
# #################################################
# Pre-requisites check
# #################################################
# Exit code 0: alls well
# Exit code 1: getopt parse failure or unexpected argument
# Exit code 2: unsupported distro or install failure
# os-release file describes the system
source /etc/os-release
# The following function exits script if an unsupported distro is detected
supported_distro
# #################################################
# Parameter parsing
# #################################################
GETOPT_PARSE=$(getopt --name "${0}" --options h --longoptions help,prefix:,with-rocm:,ompi-version:,ucx-version:,ucx-tuning -- "$@") \
|| { echo "getopt invocation failed; could not parse the command line"; exit_with_error 1; }
eval set -- "${GETOPT_PARSE}"
while true; do
case "${1}" in
-h|--help)
display_help
exit 0
;;
--prefix)
install_prefix="${2}"
ompi_prefix="${install_prefix}/mpi"
ucx_prefix="${install_prefix}/ucx"
shift 2 ;;
--with-rocm)
with_rocm="${2}"
shift 2 ;;
--ompi-version)
ompi_version="${2}"
shift 2 ;;
--ucx-version)
ucx_version="${2}"
shift 2 ;;
--ucx-tuning)
ucx_enable_tuning=1
shift 1 ;;
--) shift ; break ;;
*) echo "Unexpected command line parameter received; aborting";
exit_with_error 1
;;
esac
done
install_openmpi
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment