Commit b755db38 authored by one's avatar one
Browse files

Improve install-mpi.sh

parent 116ae127
#!/bin/bash #!/bin/bash
set -euo pipefail
ucx_version=1.20.0 install_prefix=/opt
ompi_version=5.0.9
with_rocm=/opt/dtk with_rocm=/opt/dtk
ompi_version=5.0.8
ucx_version=1.20.0
ucx_enable_tuning=0
ompi_prefix="${install_prefix}/mpi"
ucx_prefix="${install_prefix}/ucx"
display_help() {
cat <<EOF
OpenMPI and UCX build helper script
Usage: $(basename "$0") [OPTIONS]
Options:
-h, --help Show this help message
--prefix=<dir> Path to install location (Default: ${install_prefix})
--with-rocm=<dir> Path to ROCm install (Default: ${with_rocm})
--ompi-version=<version> OpenMPI version (Default: ${ompi_version})
--ucx-version=<version> UCX version (Default: ${ucx_version})
--ucx-tuning Enable UCX tuning
EOF
}
# prereq: ${ID} must be defined before calling
supported_distro() {
if [ -z "${ID+foo}" ]; then
printf "supported_distro(): \$ID must be set\n"
exit 2
fi
install_dir=/opt case "${ID}" in
ucx_prefix=${install_dir}/ucx debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
ompi_prefix=${install_dir}/mpi true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
exit 2
;;
esac
}
exit_with_error( ) exit_with_error() {
{
if (( $1 == 2 )); then if (( $1 == 2 )); then
# Failure in some install step # Failure in some install step
# Print some message about needed dependencies # Print some message about needed dependencies
...@@ -24,19 +57,19 @@ exit_with_error( ) ...@@ -24,19 +57,19 @@ exit_with_error( )
printf "The following package manager install command may be needed:\n" printf "The following package manager install command may be needed:\n"
case "${ID}" in case "${ID}" in
ubuntu) ubuntu)
printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n" printf "sudo apt install -y %s\n" "${library_dependencies_ubuntu[*]}"
;; ;;
centos|rhel|tencentos|kylin) centos|rhel|tencentos|kylin)
printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n" printf "sudo yum -y --nogpgcheck install %s\n" "${library_dependencies_centos[*]}"
;; ;;
fedora|rocky) fedora|rocky)
printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n" printf "sudo dnf install -y %s\n" "${library_dependencies_fedora[*]}"
;; ;;
sles) sles)
printf "sudo zypper -n --no-gpg-checks install ${library_dependencies_sles[*]}\n" printf "sudo zypper -n --no-gpg-checks install %s\n" "${library_dependencies_sles[*]}"
;; ;;
*) *)
exit 2 exit 2
...@@ -44,78 +77,61 @@ exit_with_error( ) ...@@ -44,78 +77,61 @@ exit_with_error( )
esac esac
fi fi
exit $1 exit "$1"
}
check_exit_code( )
{
if (( $? != 0 )); then
err=$1
msg=$2
if [[ "$msg" == "" ]]; then
msg="Unknown error"
fi
echo "ERROR: $msg"
exit $err
fi
} }
# Clone and build OpenMPI+UCX in /opt # Clone and build OpenMPI+UCX in /opt
install_openmpi( ) install_openmpi() {
{
# Backup old installations if they exist (overwrite existing .bak if present) # Backup old installations if they exist (overwrite existing .bak if present)
[ -d "${ompi_prefix}" ] && { [ -d "${ompi_prefix}" ] && {
rm -rf ${ompi_prefix}.bak; mv ${ompi_prefix} ${ompi_prefix}.bak; rm -rf "${ompi_prefix}.bak"; mv "${ompi_prefix}" "${ompi_prefix}.bak";
} }
[ -d "${ucx_prefix}" ] && { [ -d "${ucx_prefix}" ] && {
rm -rf ${ucx_prefix}.bak; mv ${ucx_prefix} ${ucx_prefix}.bak; rm -rf "${ucx_prefix}.bak"; mv "${ucx_prefix}" "${ucx_prefix}.bak";
} }
# OpenMPI and UCX install to one of these locations depending on OS # OpenMPI and UCX install to one of these locations depending on OS
local ucx_lib_folder=${ucx_prefix}/lib local ucx_lib_folder="${ucx_prefix}/lib"
local ompi_lib_folder=${ompi_prefix}/lib local ompi_lib_folder="${ompi_prefix}/lib"
local ucx_lib64_folder=${ucx_prefix}/lib64 local ucx_lib64_folder="${ucx_prefix}/lib64"
local ompi_lib64_folder=${ompi_prefix}/lib64 local ompi_lib64_folder="${ompi_prefix}/lib64"
local src_dir=${PWD} local src_dir="${PWD}"
local ucx_src=${src_dir}/ucx-${ucx_version} local ucx_src="${src_dir}/ucx-${ucx_version}"
local ucx_tarball=ucx-${ucx_version}.tar.gz local ucx_tarball="ucx-${ucx_version}.tar.gz"
local ompi_src=${src_dir}/openmpi-${ompi_version} local ompi_src="${src_dir}/openmpi-${ompi_version}"
local ompi_tarball=openmpi-${ompi_version}.tar.gz local ompi_tarball="openmpi-${ompi_version}.tar.gz"
# Download UCX on demand # Download UCX on demand
if [ ! -d "${ucx_src}" ]; then rm -rf "${ucx_src}"
if [ ! -f "${ucx_tarball}" ]; then if [ ! -f "${ucx_tarball}" ]; then
wget https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball} wget "https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball}" \
fi || { echo "Failed to download UCX tarball"; exit 1; }
check_exit_code 2
tar -zxf ${ucx_tarball}
check_exit_code 2
fi fi
tar -zxf "${ucx_tarball}"
# Download OpenMPI on demand # Download OpenMPI on demand
if [ ! -d "${ompi_src}" ]; then rm -rf "${ompi_src}"
if [ ! -f "${ompi_tarball}" ]; then if [ ! -f "${ompi_tarball}" ]; then
wget https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball} wget "https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball}" \
fi || { echo "Failed to download OpenMPI tarball"; exit 1; }
check_exit_code 2
tar -zxf ${ompi_tarball}
check_exit_code 2
fi fi
tar -zxf "${ompi_tarball}"
# Build UCX cd "${ucx_src}" || { echo "Failed to cd into ${ucx_src}"; exit 1; }
cd ${ucx_src}; local ucx_tuning_flag=""
./contrib/configure-release --prefix=${ucx_prefix} \ if (( ucx_enable_tuning == 1 )); then
ucx_tuning_flag="--enable-tuning"
fi
./contrib/configure-release --prefix="${ucx_prefix}" \
--enable-optimizations ${ucx_tuning_flag} \
--enable-cma --enable-mt \ --enable-cma --enable-mt \
--with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \ --with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \
--with-verbs=/usr/include --with-rdmacm=/usr \ --with-verbs=/usr/include --with-rdmacm=/usr \
--with-rocm=${with_rocm} \ --with-rocm="${with_rocm}" \
--without-knem --without-cuda --without-java --without-knem --without-cuda --without-java
check_exit_code 2 make -j"$(nproc)"
make -j$(nproc)
check_exit_code 2
make install make install
check_exit_code 2
cd ..
# Check for successful build # Check for successful build
if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \ if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \
...@@ -123,27 +139,22 @@ install_openmpi( ) ...@@ -123,27 +139,22 @@ install_openmpi( )
([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \ ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \
[ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then
echo "Error: UCX install unsuccessful." echo "Error: UCX install unsuccessful."
exit 3 exit_with_error 2
fi fi
export LD_LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LD_LIBRARY_PATH}" export LD_LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LD_LIBRARY_PATH:-}"
export LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LIBRARY_PATH}" export LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LIBRARY_PATH:-}"
export CPATH="${ucx_prefix}/include:${CPATH}" export CPATH="${ucx_prefix}/include:${CPATH:-}"
# Build OpenMPI cd "${ompi_src}" || { echo "Failed to cd into ${ompi_src}"; exit 1; }
cd ${ompi_src} ./configure --prefix="${ompi_prefix}" \
./configure --prefix=${ompi_prefix} \ --with-ucx="${ucx_prefix}" \
--with-ucx=${ucx_prefix} \ --with-rocm="${with_rocm}" \
--with-rocm=${with_rocm} \
--enable-builtin-atomics \ --enable-builtin-atomics \
--enable-wrapper-rpath \ --enable-wrapper-rpath \
--without-verbs --enable-mca-no-build=btl-uct --enable-mca-no-build=btl-uct
check_exit_code 2 make -j"$(nproc)"
make -j$(nproc)
check_exit_code 2
make install make install
check_exit_code 2
cd ..
# Check for successful build # Check for successful build
if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then
...@@ -152,4 +163,56 @@ install_openmpi( ) ...@@ -152,4 +163,56 @@ install_openmpi( )
fi fi
} }
# #################################################
# Pre-requisites check
# #################################################
# Exit code 0: alls well
# Exit code 1: getopt parse failure or unexpected argument
# Exit code 2: unsupported distro or install failure
# os-release file describes the system
source /etc/os-release
# The following function exits script if an unsupported distro is detected
supported_distro
# #################################################
# Parameter parsing
# #################################################
GETOPT_PARSE=$(getopt --name "${0}" --options h --longoptions help,prefix:,with-rocm:,ompi-version:,ucx-version:,ucx-tuning -- "$@") \
|| { echo "getopt invocation failed; could not parse the command line"; exit_with_error 1; }
eval set -- "${GETOPT_PARSE}"
while true; do
case "${1}" in
-h|--help)
display_help
exit 0
;;
--prefix)
install_prefix="${2}"
ompi_prefix="${install_prefix}/mpi"
ucx_prefix="${install_prefix}/ucx"
shift 2 ;;
--with-rocm)
with_rocm="${2}"
shift 2 ;;
--ompi-version)
ompi_version="${2}"
shift 2 ;;
--ucx-version)
ucx_version="${2}"
shift 2 ;;
--ucx-tuning)
ucx_enable_tuning=1
shift 1 ;;
--) shift ; break ;;
*) echo "Unexpected command line parameter received; aborting";
exit_with_error 1
;;
esac
done
install_openmpi install_openmpi
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment