rochplmxp_dtk26.patch 7.21 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91afcc4..6331291 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,7 +88,7 @@ foreach(i ${rochplmxp_device_source})
 endforeach()
 
 # HIP flags workaround while target_compile_options does not work
-list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -fPIE")
+list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -fPIE --gpu-max-threads-per-block=1024")
 list(APPEND CMAKE_HOST_FLAGS "")
 
 if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -100,21 +100,35 @@ else()
 endif()
 
 # GPU arch targets
-set(TARGETS "gfx900;gfx906")
-if(HIP_VERSION VERSION_GREATER_EQUAL "3.7")
-  set(TARGETS "${TARGETS};gfx908")
-endif()
-if(HIP_VERSION VERSION_GREATER_EQUAL "4.3")
-  set(TARGETS "${TARGETS};gfx90a")
-endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "5.7")
-  set(TARGETS "${TARGETS};gfx942")
+set(ARCHS "")
+if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "")
+  string(REPLACE "," ";" ARCHS "${HPL_BUILD_ARCH}")
+  list(TRANSFORM ARCHS STRIP)
+  list(REMOVE_DUPLICATES ARCHS)
+  message(STATUS "Using manually specified GPU targets: ${ARCHS}")
+else()
+  message(STATUS "Detecting available architecture")
+  find_program(ROCMINFO_EXECUTABLE rocminfo)
+  if(ROCMINFO_EXECUTABLE)
+    execute_process(
+      COMMAND ${ROCMINFO_EXECUTABLE}
+      OUTPUT_VARIABLE ROCMINFO_OUTPUT
+      ERROR_QUIET
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+    string(REGEX MATCHALL "Name:[ \t]+gfx[0-9a-z]+" ARCH_MATCHES "${ROCMINFO_OUTPUT}")
+    string(REGEX REPLACE "Name:[ \t]+" "" ARCHS "${ARCH_MATCHES}")
+    list(REMOVE_DUPLICATES ARCHS)
+  endif()
 endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "6.5")
-  set(TARGETS "${TARGETS};gfx950")
+
+if(ARCHS STREQUAL "")
+  message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use ./install.sh --arch=gfxXXX")
 endif()
 
-foreach(target ${TARGETS})
+message(STATUS "Building for GPU architecture: ${ARCHS}")
+
+foreach(target ${ARCHS})
   list(APPEND HIP_HIPCC_FLAGS "--offload-arch=${target}")
 endforeach()
 
@@ -173,7 +187,7 @@ set_target_properties(rochplmxp PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BIN
 
 set_target_properties(rochplmxp PROPERTIES LINKER_LANGUAGE CXX)
 
-set_target_properties(rochplmxp PROPERTIES HIP_ARCHITECTURES "${DEFAULT_AMDGPU_TARGETS}")
+set_target_properties(rochplmxp PROPERTIES HIP_ARCHITECTURES "${ARCHS}")
 
 # # Configure a header file to pass the rocHPL-MxP version
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/include/hplmxp_version.hpp.in"
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 164d06d..041a8e2 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -109,7 +109,7 @@ if(NOT ROCM_FOUND)
   execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip
                   WORKING_DIRECTORY ${PROJECT_EXTERN_DIR})
 
-  find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag})
+  set(CMAKE_MODULE_PATH "${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/share/rocm/cmake;${CMAKE_MODULE_PATH}")
 endif()
 
 include(ROCMSetupVersion)
diff --git a/install.sh b/install.sh
index de72a20..6f2ef05 100755
--- a/install.sh
+++ b/install.sh
@@ -18,6 +18,7 @@ function display_help()
   echo "    [--with-rocblas=<dir>] Path to rocBLAS library (Default: /opt/rocm/rocblas)"
   echo "    [--with-rocsolver=<dir>] Path to rocSOLVER library (Default: /opt/rocm/rocsolver)"
   echo "    [--with-mpi=<dir>] Path to external MPI install (Default: clone+build OpenMPI)"
+  echo "    [--arch=<archs>] Specify comma separated architecture list to build (Default: detect from rocminfo)"
   echo "    [--verbose-print] Verbose output during HPL setup (Default: true)"
   echo "    [--enable-tracing] Annotate profiler traces with rocTX markers (Default: false)"
   echo "    [--progress-report] Print progress report to terminal during HPL run (Default: true)"
@@ -33,10 +34,10 @@ supported_distro( )
   fi
 
   case "${ID}" in
-    ubuntu|centos|rhel|fedora|sles)
+    ubuntu|centos|rhel|fedora|sles|kylin|rocky)
         true
         ;;
-    *)  printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n"
+    *)  printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora, SLES, Kylin and Rocky\n"
         exit 2
         ;;
   esac
@@ -68,11 +69,11 @@ exit_with_error( )
         printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n"
         ;;
 
-      centos|rhel)
+      centos|rhel|kylin)
         printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n"
         ;;
 
-      fedora)
+      fedora|rocky)
         printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n"
         ;;
 
@@ -217,6 +218,7 @@ verbose_print=true
 enable_tracing=false
 progress_report=true
 detailed_timing=true
+arch=
 
 # #################################################
 # Parameter parsing
@@ -225,7 +227,7 @@ detailed_timing=true
 # check if we have a modern version of getopt that can handle whitespace and long parameters
 getopt -T
 if [[ $? -eq 4 ]]; then
-  GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,with-rocsolver:,verbose-print:,enable-tracing:,progress-report:,detailed-timing: --options hg -- "$@")
+  GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,with-rocsolver:,verbose-print:,enable-tracing:,progress-report:,detailed-timing:,arch: --options hg -- "$@")
 else
   echo "Need a new version of getopt"
   exit_with_error 1
@@ -262,6 +264,9 @@ while true; do
     --with-rocsolver)
         with_rocsolver=${2}
         shift 2 ;;
+    --arch)
+        arch=${2}
+        shift 2 ;;
     --verbose-print)
         verbose_print=${2}
         shift 2 ;;
@@ -335,11 +340,14 @@ pushd .
   if [[ "${enable_tracing}" == on || "${enable_tracing}" == true || "${enable_tracing}" == 1 || "${enable_tracing}" == enabled ]]; then
     cmake_common_options="${cmake_common_options} -DHPLMXP_TRACING=ON"
   fi
+  if [[ -n "${arch}" ]]; then
+    cmake_common_options="${cmake_common_options} -DHPL_BUILD_ARCH=${arch}"
+  fi
   shopt -u nocasematch
 
   # Build library with AMD toolchain because of existence of device kernels
   mkdir -p ${build_dir} && cd ${build_dir}
-  ${cmake_executable} ${cmake_common_options} ..
+  ${cmake_executable} --fresh ${cmake_common_options} ..
   check_exit_code 2
 
   make -j$(nproc) install
diff --git a/src/hplmxp_ptest.cpp b/src/hplmxp_ptest.cpp
index 11d0f44..e8b1eee 100644
--- a/src/hplmxp_ptest.cpp
+++ b/src/hplmxp_ptest.cpp
@@ -211,7 +211,7 @@ void HPLMXP_ptest(HPLMXP_T_test& test,
                        ctime(&current_time_end));
       }
 #ifdef HPLMXP_PROGRESS_REPORT
-      printf("Final Score:    %7.4e GFLOPS \n", Gflops);
+      printf("Final Score:    %7.9e GFLOPS \n", Gflops);
 #endif
     }
 #ifdef HPLMXP_DETAILED_TIMING
diff --git a/src/pgesv/hplmxp_pgetrf.cpp b/src/pgesv/hplmxp_pgetrf.cpp
index ccbd4c0..0230b44 100644
--- a/src/pgesv/hplmxp_pgetrf.cpp
+++ b/src/pgesv/hplmxp_pgetrf.cpp
@@ -420,7 +420,7 @@ void HPLMXP_pgetrf(HPLMXP_T_grid&         grid,
       printf("  %9.3e  |", step_gflops);
 #endif
 
-      printf("    %9.3e   \n", gflops);
+      printf("    %9.9e   \n", gflops);
     }
 #endif