rochpcg-install.patch 4.78 KB
Newer Older
one's avatar
one committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
diff --git a/.gitignore b/.gitignore
index 1300bd7..83490ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,6 +37,7 @@ tags
 
 # build-in-source directory
 build
+deps
 
 # doc directory
 docBin
diff --git a/install.sh b/install.sh
index e2c3a80..8922489 100755
--- a/install.sh
+++ b/install.sh
@@ -17,7 +17,7 @@ function display_help()
   echo "    [-g|--debug] -DCMAKE_BUILD_TYPE=Debug (default: Release)"
   echo "    [-t|--test] build single GPU test"
   echo "    [--with-rocm=<dir>] Path to ROCm install (default: /opt/rocm)"
-  echo "    [--with-mpi=<dir>] Path to external MPI install (Default: clone+build OpenMPI v4.1.0 in deps/)"
+  echo "    [--with-mpi=<dir>] Path to external MPI install (Default: clone+build OpenMPI in deps/)"
   echo "    [--gpu-aware-mpi] MPI library supports GPU-aware communication (Default: false)"
   echo "    [--with-openmp] compile with OpenMP support (default: enabled)"
   echo "    [--with-memmgmt] compile with smart memory management (default: enabled)"
@@ -186,22 +186,76 @@ install_packages( )
 # Clone and build OpenMPI+UCX in rochpcg/openmpi
 install_openmpi( )
 {
-  if [ ! -d "./deps/ucx" ]; then
-    mkdir -p deps && cd deps
-    git clone --branch v1.13.1 https://github.com/openucx/ucx.git ucx
-    cd ucx; ./autogen.sh; ./autogen.sh #why do we have to run this twice?
-    mkdir build; cd build
-    ../contrib/configure-opt --prefix=${PWD}/../ --with-rocm=${with_rocm} --without-knem --without-cuda --without-java
-    make -j$(nproc); make install; cd ../../..
+  local install_dir=${PWD}/deps
+  local ucx_prefix=${install_dir}/ucx
+  local ompi_prefix=${install_dir}/openmpi
+
+  local ucx_lib_folder=${ucx_prefix}/lib
+  local ucx_lib64_folder=${ucx_prefix}/lib64
+  local ompi_lib_folder=${ompi_prefix}/lib
+  local ompi_lib64_folder=${ompi_prefix}/lib64
+
+  local ucx_version=1.20.0
+  local ucx_src=${install_dir}/ucx-${ucx_version}
+  local ucx_tarball=ucx-${ucx_version}.tar.gz
+  local ompi_version=5.0.9
+  local ompi_src=${install_dir}/openmpi-${ompi_version}
+  local ompi_tarball=openmpi-${ompi_version}.tar.gz
+
+  # Create the tpl directory
+  mkdir -p ${install_dir} && cd ${install_dir}
+
+  # Download UCX on demand
+  rm -rf ${ucx_src}
+  if [ ! -f "${ucx_tarball}" ]; then
+      wget https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball}
+  fi
+  tar -zxf ${ucx_tarball}
+  # Download OpenMPI on demand
+  rm -rf ${ompi_src}
+  if [ ! -f "${ompi_tarball}" ]; then
+      wget https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball}
+  fi
+  tar -zxf ${ompi_tarball}
+
+
+  # Build UCX on demand
+  if [ ! -f "${ucx_lib_folder}/libucm.so" ] && [ ! -f "${ucx_lib64_folder}/libucm.so" ]; then
+    cd ${ucx_src}
+    ./contrib/configure-release --prefix=${ucx_prefix} \
+        --enable-optimizations --enable-tuning \
+        --enable-cma --enable-mt \
+        --with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \
+        --with-verbs=/usr/include --with-rdmacm=/usr \
+        --with-rocm=${with_rocm} \
+        --without-knem --without-cuda --without-java
+    make -j$(nproc)
+    make install
   fi
 
-  if [ ! -d "./deps/openmpi" ]; then
-    mkdir -p deps && cd deps
-    git clone --branch v4.1.4 https://github.com/open-mpi/ompi.git openmpi
-    cd openmpi; ./autogen.pl; mkdir build; cd build
-    ../configure --prefix=${PWD}/../ --with-ucx=${PWD}/../../ucx --without-verbs
-    make -j$(nproc); make install; cd ../../..
+  export LD_LIBRARY_PATH="${ucx_lib_folder}:${ucx_lib64_folder}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
+  export LIBRARY_PATH="${ucx_lib_folder}:${ucx_lib64_folder}${LIBRARY_PATH:+:${LIBRARY_PATH}}"
+  export CPATH="${ucx_prefix}/include${CPATH:+:${CPATH}}"
+
+  # Build OpenMPI on demand
+  if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then
+    cd ${ompi_src}
+    ./configure --prefix=${ompi_prefix} \
+      --with-ucx=${ucx_prefix} \
+      --with-rocm=${with_rocm} \
+      --enable-builtin-atomics \
+      --enable-wrapper-rpath \
+      --enable-mca-no-build=btl-uct
+    make -j$(nproc)
+    make install
   fi
+
+  export LD_LIBRARY_PATH="${ompi_lib_folder}:${ompi_lib64_folder}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
+  export LIBRARY_PATH="${ompi_lib_folder}:${ompi_lib64_folder}${LIBRARY_PATH:+:${LIBRARY_PATH}}"
+  export CPATH="${ompi_prefix}/include${CPATH:+:${CPATH}}"
+  export OPAL_PREFIX=${ompi_prefix}
+
+  cd ${install_dir}/..
 }
 
 # #################################################
@@ -396,7 +450,7 @@ pushd .
   fi
 
   # Build library with AMD toolchain because of existense of device kernels
-  ${cmake_executable} ${cmake_common_options} \
+  ${cmake_executable} --fresh ${cmake_common_options} \
     -DCPACK_SET_DESTDIR=OFF \
     -DCMAKE_INSTALL_PREFIX=${install_prefix} \
     -DCPACK_PACKAGING_INSTALL_PREFIX=${with_rocm} \