install-mpi.sh 6.02 KB
Newer Older
one's avatar
one committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/bin/bash

install_dir=/opt
with_rocm=/opt/dtk
ucx_version=1.20.0
ompi_version=5.0.8

ompi_prefix=${install_dir}/mpi
ucx_prefix=${install_dir}/ucx

exit_with_error( )
{
  if (( $1 == 2 )); then
    # Failure in some install step
    # Print some message about needed dependencies

    # dependencies needed for executable to build
    local library_dependencies_ubuntu=( "git" "make" "cmake" "libnuma-dev" "pkg-config" "autoconf" "libtool" "automake" "m4" "flex"  "libgomp1" "libibverbs-dev" "librdmacm-dev" )
    local library_dependencies_centos=( "git" "make" "cmake3" "gcc-c++" "rpm-build" "epel-release" "numactl-libs" "autoconf"         "libtool" "automake" "m4" "flex" "libgomp" "libibverbs-devel" "librdmacm-devel" )
    local library_dependencies_fedora=( "git" "make" "cmake" "gcc-c++" "libcxx-devel" "rpm-build" "numactl-libs"  "autoconf"         "libtool" "automake" "m4" "flex" "libgomp" "libibverbs-devel" "librdmacm-devel" )
    local library_dependencies_sles=(   "git" "make" "cmake" "gcc-c++" "libcxxtools9" "rpm-build" "libnuma-devel" "autoconf"         "libtool" "automake" "m4" "flex" "libgomp1" "libibverbs-devel" "librdmacm-devel" )

    printf "Installation failed. Some required packages may be missing.\n"
    printf "The following package manager install command may be needed:\n"
    case "${ID}" in
      ubuntu)
        printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n"
        ;;

      centos|rhel|tencentos|kylin)
        printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n"
        ;;

      fedora|rocky)
        printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n"
        ;;

      sles)
        printf "sudo zypper -n --no-gpg-checks install ${library_dependencies_sles[*]}\n"
        ;;
      *)  
        exit 2
        ;;
    esac
  fi  

  exit $1
}

check_exit_code( )
{
  if (( $? != 0 )); then
    err=$1
    msg=$2
    if [[ "$msg" == "" ]]; then
      msg="Unknown error"
    fi  
    echo "ERROR: $msg"
    exit $err
  fi  
}

# Clone and build OpenMPI+UCX in /opt
install_openmpi( )
{
  # Backup old installations if they exist (overwrite existing .bak if present)
  [ -d "${ompi_prefix}" ] && {
    rm -rf ${ompi_prefix}.bak; mv ${ompi_prefix} ${ompi_prefix}.bak;
    }
  [ -d "${ucx_prefix}" ] && {
    rm -rf ${ucx_prefix}.bak; mv ${ucx_prefix} ${ucx_prefix}.bak;
    }

  # OpenMPI and UCX install to one of these locations depending on OS
  ucx_lib_folder=${ucx_prefix}/lib
  ompi_lib_folder=${ompi_prefix}/lib
  ucx_lib64_folder=${ucx_prefix}/lib64
  ompi_lib64_folder=${ompi_prefix}/lib64

  ucx_build_cmd="./contrib/configure-release --prefix=${ucx_prefix} \
    --enable-cma --enable-mt \
    --with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \
    --with-verbs=/usr/include --with-rdmacm=/usr \
    --with-rocm=${with_rocm} \
    --without-knem --without-cuda --without-java"

  if [ ! -d "./ucx" ]; then
    ucx_src=ucx-${ucx_version}
    ucx_tarball=${ucx_src}.tar.gz
    if [ ! -f "${ucx_tarball}" ]; then
        wget https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball}
    fi  
    check_exit_code 2
    tar -zxf ${ucx_tarball}
    check_exit_code 2
    mv ${ucx_src} ucx
    check_exit_code 2
    cd ucx;
    check_exit_code 2
    ${ucx_build_cmd}
    check_exit_code 2
    make -j$(nproc)
    check_exit_code 2
    make install
    check_exit_code 2
    cd ..
  elif ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ]  || \
        [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) && \
       ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ]  || \
        [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then
    cd ucx;
    check_exit_code 2
    ${ucx_build_cmd}
    check_exit_code 2
    make -j$(nproc)
    check_exit_code 2
    make install
    check_exit_code 2
    cd ..
  fi

  # Check for successful build
  if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ]  || \
      [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) &&
     ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ]  || \
      [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then
    echo "Error: UCX install unsuccessful."
    exit 3
  fi

  export LD_LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LD_LIBRARY_PATH}"
  export LIBRARY_PATH="${ucx_prefix}/lib:${ucx_prefix}/lib64:${LIBRARY_PATH}"
  export CPATH="${ucx_prefix}/include:${CPATH}"

  ompi_build_cmd="./configure --prefix=${ompi_prefix} \
    --with-ucx=${ucx_prefix} \
    --with-rocm=${with_rocm} \
one's avatar
one committed
138
139
    --enable-builtin-atomics \
    --enable-wrapper-rpath \
one's avatar
one committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
    --without-verbs --enable-mca-no-build=btl-uct"

  if [ ! -d "./openmpi" ]; then
    ompi_src=openmpi-${ompi_version}
    ompi_tarball=${ompi_src}.tar.gz
    if [ ! -f "${ompi_tarball}" ]; then
        wget https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball}
    fi
    check_exit_code 2
    tar -zxf ${ompi_tarball}
    check_exit_code 2
    mv ${ompi_src} openmpi
    check_exit_code 2
    cd openmpi
    check_exit_code 2
    ./configure --prefix=${ompi_prefix} --with-ucx=${ucx_prefix} --without-verbs --disable-man-pages --enable-mca-no-   build=btl-uct
    check_exit_code 2
    make -j$(nproc)
    check_exit_code 2
    make install
    check_exit_code 2
    cd ..
  elif [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then
    cd openmpi
    check_exit_code 2
    ./configure --prefix=${ompi_prefix} --with-ucx=${ucx_prefix} --without-verbs --disable-man-pages --enable-mca-no-   build=btl-uct
    check_exit_code 2
    make -j$(nproc)
    check_exit_code 2
    make install
    check_exit_code 2
    cd ..
  fi

  # Check for successful build
  if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then
    echo "Error: OpenMPI install unsuccessful."
    exit_with_error 2
  fi
}

install_openmpi