rochplmxp-bw.patch 9.57 KB
Newer Older
one's avatar
one committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91afcc4..b1c3ef6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,7 +88,7 @@ foreach(i ${rochplmxp_device_source})
 endforeach()
 
 # HIP flags workaround while target_compile_options does not work
-list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -fPIE")
+list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -fPIE --gpu-max-threads-per-block=1024")
 list(APPEND CMAKE_HOST_FLAGS "")
 
 if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -99,21 +99,50 @@ else()
   list(APPEND CMAKE_HOST_FLAGS "-O3;-march=native;-Wno-deprecated-declarations")
 endif()
 
-# GPU arch targets
-set(TARGETS "gfx900;gfx906")
-if(HIP_VERSION VERSION_GREATER_EQUAL "3.7")
-  set(TARGETS "${TARGETS};gfx908")
-endif()
-if(HIP_VERSION VERSION_GREATER_EQUAL "4.3")
-  set(TARGETS "${TARGETS};gfx90a")
+set(ARCHS "")  # use plural to indicate list
+if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "")
+  string(REPLACE "," ";" ARCHS "${HPL_BUILD_ARCH}")
+  list(TRANSFORM ARCHS STRIP)
+  list(REMOVE_DUPLICATES ARCHS)
+  message(STATUS "Using manually specified GPU targets: ${ARCHS}")
+else()
+  message(STATUS "Detecting available architecture")
+  ############ Find using rocminfo #####################
+  find_program(ROCMINFO_EXECUTABLE rocminfo)
+  if(ROCMINFO_EXECUTABLE)
+    execute_process(
+      COMMAND ${ROCMINFO_EXECUTABLE}
+      OUTPUT_VARIABLE ROCMINFO_OUTPUT
+      ERROR_QUIET
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+
+    # 1) Only match lines where the token follows "Name:"
+    string(REGEX MATCHALL "Name:[ \t]+gfx[0-9a-z]+" ARCH_MATCHES "${ROCMINFO_OUTPUT}")
+
+    # 2) Strip the leading "Name:   " to keep just gfx tokens
+    string(REGEX REPLACE "Name:[ \t]+" "" ARCHS "${ARCH_MATCHES}")
+
+    # 3) Remove duplicates
+    list(REMOVE_DUPLICATES ARCHS)
+
+    foreach(match ${ARCHS})
+      string(REGEX REPLACE "Name:\\s+" "" arch "${match}")
+      list(APPEND ARCH "${arch}")
+    endforeach()
+  endif()
 endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "5.7")
-  set(TARGETS "${TARGETS};gfx942")
+if (HIP_VERSION VERSION_GREATER_EQUAL "7.0")
+  set(TARGETS "${TARGETS};gfx1201")
 endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "6.5")
-  set(TARGETS "${TARGETS};gfx950")
+
+if(ARCHS STREQUAL "")
+    message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use ./install.sh --arch=gfxXXX")
 endif()
 
+message(STATUS "Building for GPU architecture: ${ARCHS}")
+
+# Generate HIP_HIPCC_FLAGS
 foreach(target ${TARGETS})
   list(APPEND HIP_HIPCC_FLAGS "--offload-arch=${target}")
 endforeach()
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 164d06d..78cc857 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -109,7 +109,8 @@ if(NOT ROCM_FOUND)
   execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip
                   WORKING_DIRECTORY ${PROJECT_EXTERN_DIR})
 
-  find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag})
+  # find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag})
+  set(CMAKE_MODULE_PATH "${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/share/rocm/cmake;${CMAKE_MODULE_PATH}")
 endif()
 
 include(ROCMSetupVersion)
diff --git a/install.sh b/install.sh
index de72a20..6542e7f 100755
--- a/install.sh
+++ b/install.sh
@@ -18,6 +18,7 @@ function display_help()
   echo "    [--with-rocblas=<dir>] Path to rocBLAS library (Default: /opt/rocm/rocblas)"
   echo "    [--with-rocsolver=<dir>] Path to rocSOLVER library (Default: /opt/rocm/rocsolver)"
   echo "    [--with-mpi=<dir>] Path to external MPI install (Default: clone+build OpenMPI)"
+  echo "    [--arch] Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)"
   echo "    [--verbose-print] Verbose output during HPL setup (Default: true)"
   echo "    [--enable-tracing] Annotate profiler traces with rocTX markers (Default: false)"
   echo "    [--progress-report] Print progress report to terminal during HPL run (Default: true)"
@@ -33,7 +34,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    ubuntu|centos|rhel|fedora|sles)
+    ubuntu|centos|rhel|fedora|sles|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -68,11 +69,11 @@ exit_with_error( )
         printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n"
         ;;
 
-      centos|rhel)
+      centos|rhel|kylin)
         printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n"
         ;;
 
-      fedora)
+      fedora|rocky)
         printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n"
         ;;
 
@@ -145,6 +146,11 @@ install_openmpi( )
     exit 3
   fi
 
+  UCX_ROOT="$(pwd)/tpl/ucx"
+  export LD_LIBRARY_PATH="${UCX_ROOT}/lib:${UCX_ROOT}/lib64:${LD_LIBRARY_PATH}"
+  export LIBRARY_PATH="${UCX_ROOT}/lib:${UCX_ROOT}/lib64:${LIBRARY_PATH}"
+  export CPATH="${UCX_ROOT}/include:${CPATH}"
+
   if [ ! -d "./tpl/openmpi" ]; then
     mkdir -p tpl && cd tpl
     git clone --branch v5.0.7 --recursive https://github.com/open-mpi/ompi.git openmpi
@@ -225,7 +231,7 @@ detailed_timing=true
 # check if we have a modern version of getopt that can handle whitespace and long parameters
 getopt -T
 if [[ $? -eq 4 ]]; then
-  GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,with-rocsolver:,verbose-print:,enable-tracing:,progress-report:,detailed-timing: --options hg -- "$@")
+  GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,with-rocsolver:,arch:,verbose-print:,enable-tracing:,progress-report:,detailed-timing: --options hg -- "$@")
 else
   echo "Need a new version of getopt"
   exit_with_error 1
@@ -262,6 +268,9 @@ while true; do
     --with-rocsolver)
         with_rocsolver=${2}
         shift 2 ;;
+    --arch)
+        arch=${2}
+        shift 2 ;;
     --verbose-print)
         verbose_print=${2}
         shift 2 ;;
@@ -335,6 +344,9 @@ pushd .
   if [[ "${enable_tracing}" == on || "${enable_tracing}" == true || "${enable_tracing}" == 1 || "${enable_tracing}" == enabled ]]; then
     cmake_common_options="${cmake_common_options} -DHPLMXP_TRACING=ON"
   fi
+  if [[ -n "${arch}" ]]; then
+    cmake_common_options="${cmake_common_options} -DHPL_BUILD_ARCH=${arch}"
+  fi
   shopt -u nocasematch
 
   # Build library with AMD toolchain because of existence of device kernels
diff --git a/scripts/mpirun_rochplmxp.in b/scripts/mpirun_rochplmxp.in
index 5ad6166..89bb1ab 100755
--- a/scripts/mpirun_rochplmxp.in
+++ b/scripts/mpirun_rochplmxp.in
@@ -44,7 +44,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    ubuntu|centos|rhel|fedora|sles)
+    ubuntu|centos|rhel|fedora|sles|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -98,6 +98,14 @@ filename=HPL-MxP.dat
 inputfile=false
 cmdrun=false
 
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
+
 # #################################################
 # MPI Args
 # #################################################
@@ -113,7 +121,10 @@ if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
   ompi_info=$(dirname ${mpi_bin})/ompi_info
   if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
     # ucx-specific args
-    mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
+    mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct \
+              -x UCX_TLS=self,sm,rocm_ipc,rocm_copy,rc_mlx5 \
+              -x UCX_MEMTYPE_CACHE=n \
+              ${mpi_args}"
   fi
 fi
 
@@ -144,7 +155,7 @@ while true; do
         exit 0
         ;;
     --version)
-        ${mpi_bin} -np 1 ${mpi_args} ${rochplmxp_runscript} --version
+        ${mpi_bin} --allow-run-as-root -np 1 ${mpi_args} ${rochplmxp_runscript} --version
         exit 0
         ;;
     -P)
@@ -200,4 +211,4 @@ else
 fi
 
 #run
-${mpi_bin} -np ${np} ${mpi_args} ${rochplmxp_runscript} ${rochplmxp_args}
+${mpi_bin} --allow-run-as-root -np ${np} ${mpi_args} ${rochplmxp_runscript} ${rochplmxp_args}
diff --git a/scripts/run_rochplmxp.in b/scripts/run_rochplmxp.in
index 698d3c1..bf1a15a 100755
--- a/scripts/run_rochplmxp.in
+++ b/scripts/run_rochplmxp.in
@@ -44,7 +44,7 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    ubuntu|centos|rhel|fedora|sles)
+    ubuntu|centos|rhel|fedora|sles|kylin|rocky)
         true
         ;;
     *)  printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -98,7 +98,9 @@ filename=HPL-MxP.dat
 inputfile=false
 cmdrun=false
 
-export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ucx_lib_dir=$tpl_dir/ucx/lib
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH
 
 # #################################################
 # Parameter parsing
diff --git a/src/hplmxp_ptest.cpp b/src/hplmxp_ptest.cpp
index 11d0f44..e8b1eee 100644
--- a/src/hplmxp_ptest.cpp
+++ b/src/hplmxp_ptest.cpp
@@ -211,7 +211,7 @@ void HPLMXP_ptest(HPLMXP_T_test& test,
                        ctime(&current_time_end));
       }
 #ifdef HPLMXP_PROGRESS_REPORT
-      printf("Final Score:    %7.4e GFLOPS \n", Gflops);
+      printf("Final Score:    %7.9e GFLOPS \n", Gflops);
 #endif
     }
 #ifdef HPLMXP_DETAILED_TIMING