install-mpi.sh 7.33 KB
Newer Older
one's avatar
one committed
1
#!/bin/bash
one's avatar
one committed
2
set -euo pipefail
one's avatar
one committed
3

one's avatar
one committed
4
install_prefix=/opt
one's avatar
one committed
5
with_rocm=/opt/dtk
6
ompi_version=5.0.9
one's avatar
one committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
ucx_version=1.20.0
ucx_enable_tuning=0

ompi_prefix="${install_prefix}/mpi"
ucx_prefix="${install_prefix}/ucx"

display_help() {
  cat <<EOF
OpenMPI and UCX build helper script
Usage: $(basename "$0") [OPTIONS]

Options:
  -h, --help                Show this help message
  --prefix=<dir>            Path to install location (Default: ${install_prefix})
  --with-rocm=<dir>         Path to ROCm install (Default: ${with_rocm})
  --ompi-version=<version>  OpenMPI version (Default: ${ompi_version})
  --ucx-version=<version>   UCX version (Default: ${ucx_version})
  --ucx-tuning              Enable UCX tuning
EOF
}
one's avatar
one committed
27

one's avatar
one committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# prereq: ${ID} must be defined before calling
supported_distro() {
  if [ -z "${ID+foo}" ]; then
    printf "supported_distro(): \$ID must be set\n"
    exit 2
  fi

  case "${ID}" in
    debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
        true
        ;;
    *)  printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
        exit 2
        ;;
  esac
}
one's avatar
one committed
44

one's avatar
one committed
45
exit_with_error() {
one's avatar
one committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  if (( $1 == 2 )); then
    # Failure in some install step
    # Print some message about needed dependencies

    # dependencies needed for executable to build
    local library_dependencies_ubuntu=( "git" "make" "cmake" "libnuma-dev" "pkg-config" "autoconf" "libtool" "automake" "m4" "flex"  "libgomp1" "libibverbs-dev" "librdmacm-dev" )
    local library_dependencies_centos=( "git" "make" "cmake3" "gcc-c++" "rpm-build" "epel-release" "numactl-libs" "autoconf"         "libtool" "automake" "m4" "flex" "libgomp" "libibverbs-devel" "librdmacm-devel" )
    local library_dependencies_fedora=( "git" "make" "cmake" "gcc-c++" "libcxx-devel" "rpm-build" "numactl-libs"  "autoconf"         "libtool" "automake" "m4" "flex" "libgomp" "libibverbs-devel" "librdmacm-devel" )
    local library_dependencies_sles=(   "git" "make" "cmake" "gcc-c++" "libcxxtools9" "rpm-build" "libnuma-devel" "autoconf"         "libtool" "automake" "m4" "flex" "libgomp1" "libibverbs-devel" "librdmacm-devel" )

    printf "Installation failed. Some required packages may be missing.\n"
    printf "The following package manager install command may be needed:\n"
    case "${ID}" in
      ubuntu)
one's avatar
one committed
60
        printf "sudo apt install -y %s\n" "${library_dependencies_ubuntu[*]}"
one's avatar
one committed
61
62
63
        ;;

      centos|rhel|tencentos|kylin)
one's avatar
one committed
64
        printf "sudo yum -y --nogpgcheck install %s\n" "${library_dependencies_centos[*]}"
one's avatar
one committed
65
66
67
        ;;

      fedora|rocky)
one's avatar
one committed
68
        printf "sudo dnf install -y %s\n" "${library_dependencies_fedora[*]}"
one's avatar
one committed
69
70
71
        ;;

      sles)
one's avatar
one committed
72
        printf "sudo zypper -n --no-gpg-checks install %s\n" "${library_dependencies_sles[*]}"
one's avatar
one committed
73
        ;;
one's avatar
one committed
74
      *)
one's avatar
one committed
75
76
77
        exit 2
        ;;
    esac
one's avatar
one committed
78
  fi
one's avatar
one committed
79

one's avatar
one committed
80
  exit "$1"
one's avatar
one committed
81
82
83
}

# Clone and build OpenMPI+UCX in /opt
one's avatar
one committed
84
install_openmpi() {
one's avatar
one committed
85
86
  # Backup old installations if they exist (overwrite existing .bak if present)
  [ -d "${ompi_prefix}" ] && {
one's avatar
one committed
87
    rm -rf "${ompi_prefix}.bak"; mv "${ompi_prefix}" "${ompi_prefix}.bak";
one's avatar
one committed
88
89
    }
  [ -d "${ucx_prefix}" ] && {
one's avatar
one committed
90
    rm -rf "${ucx_prefix}.bak"; mv "${ucx_prefix}" "${ucx_prefix}.bak";
one's avatar
one committed
91
92
93
    }

  # OpenMPI and UCX install to one of these locations depending on OS
one's avatar
one committed
94
95
96
97
  local ucx_lib_folder="${ucx_prefix}/lib"
  local ompi_lib_folder="${ompi_prefix}/lib"
  local ucx_lib64_folder="${ucx_prefix}/lib64"
  local ompi_lib64_folder="${ompi_prefix}/lib64"
one's avatar
one committed
98

one's avatar
one committed
99
100
101
102
103
  local src_dir="${PWD}"
  local ucx_src="${src_dir}/ucx-${ucx_version}"
  local ucx_tarball="ucx-${ucx_version}.tar.gz"
  local ompi_src="${src_dir}/openmpi-${ompi_version}"
  local ompi_tarball="openmpi-${ompi_version}.tar.gz"
one's avatar
one committed
104
105

  # Download UCX on demand
one's avatar
one committed
106
107
108
109
  rm -rf "${ucx_src}"
  if [ ! -f "${ucx_tarball}" ]; then
      wget "https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball}" \
        || { echo "Failed to download UCX tarball"; exit 1; }
one's avatar
one committed
110
  fi
one's avatar
one committed
111
112
  tar -zxf "${ucx_tarball}"

one's avatar
one committed
113
  # Download OpenMPI on demand
one's avatar
one committed
114
115
116
117
  rm -rf "${ompi_src}"
  if [ ! -f "${ompi_tarball}" ]; then
      wget "https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball}" \
        || { echo "Failed to download OpenMPI tarball"; exit 1; }
one's avatar
one committed
118
  fi
one's avatar
one committed
119
  tar -zxf "${ompi_tarball}"
one's avatar
one committed
120

one's avatar
one committed
121
122
123
124
125
126
127
  cd "${ucx_src}" || { echo "Failed to cd into ${ucx_src}"; exit 1; }
  local ucx_tuning_flag=""
  if (( ucx_enable_tuning == 1 )); then
    ucx_tuning_flag="--enable-tuning"
  fi
  ./contrib/configure-release --prefix="${ucx_prefix}" \
    --enable-optimizations ${ucx_tuning_flag} \
one's avatar
one committed
128
129
130
    --enable-cma --enable-mt \
    --with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \
    --with-verbs=/usr/include --with-rdmacm=/usr \
one's avatar
one committed
131
    --with-rocm="${with_rocm}" \
one's avatar
one committed
132
    --without-knem --without-cuda --without-java
one's avatar
one committed
133
  make -j"$(nproc)"
one's avatar
one committed
134
135
  make install

one's avatar
one committed
136
137
138
139
140
141
  # Check for successful build
  if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ]  || \
      [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) &&
     ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ]  || \
      [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then
    echo "Error: UCX install unsuccessful."
one's avatar
one committed
142
    exit_with_error 2
one's avatar
one committed
143
144
  fi

one's avatar
one committed
145
146
147
  export LD_LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LD_LIBRARY_PATH:-}"
  export LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LIBRARY_PATH:-}"
  export CPATH="${ucx_prefix}/include:${CPATH:-}"
one's avatar
one committed
148

one's avatar
one committed
149
150
151
152
  cd "${ompi_src}" || { echo "Failed to cd into ${ompi_src}"; exit 1; }
  ./configure --prefix="${ompi_prefix}" \
    --with-ucx="${ucx_prefix}" \
    --with-rocm="${with_rocm}" \
one's avatar
one committed
153
154
    --enable-builtin-atomics \
    --enable-wrapper-rpath \
one's avatar
one committed
155
156
    --enable-mca-no-build=btl-uct
  make -j"$(nproc)"
one's avatar
one committed
157
  make install
one's avatar
one committed
158
159
160
161
162
163
164
165

  # Check for successful build
  if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then
    echo "Error: OpenMPI install unsuccessful."
    exit_with_error 2
  fi
}

one's avatar
one committed
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# #################################################
# Pre-requisites check
# #################################################
# Exit code 0: alls well
# Exit code 1: getopt parse failure or unexpected argument
# Exit code 2: unsupported distro or install failure

# os-release file describes the system
source /etc/os-release

# The following function exits script if an unsupported distro is detected
supported_distro

# #################################################
# Parameter parsing
# #################################################

GETOPT_PARSE=$(getopt --name "${0}" --options h --longoptions help,prefix:,with-rocm:,ompi-version:,ucx-version:,ucx-tuning -- "$@") \
  || { echo "getopt invocation failed; could not parse the command line"; exit_with_error 1; }

eval set -- "${GETOPT_PARSE}"

while true; do
  case "${1}" in
    -h|--help)
        display_help
        exit 0
        ;;
    --prefix)
        install_prefix="${2}"
        ompi_prefix="${install_prefix}/mpi"
        ucx_prefix="${install_prefix}/ucx"
        shift 2 ;;
    --with-rocm)
        with_rocm="${2}"
        shift 2 ;;
    --ompi-version)
        ompi_version="${2}"
        shift 2 ;;
    --ucx-version)
        ucx_version="${2}"
        shift 2 ;;
    --ucx-tuning)
        ucx_enable_tuning=1
        shift 1 ;;
    --) shift ; break ;;
    *)  echo "Unexpected command line parameter received; aborting";
        exit_with_error 1
        ;;
  esac
done

install_openmpi