Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
hg-misc-tools
Commits
073fee70
Commit
073fee70
authored
Feb 20, 2026
by
one
Browse files
Add rochpl-install.patch
parent
4bdccdbc
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
405 additions
and
0 deletions
+405
-0
projects/rocHPL/rochpl-install.patch
projects/rocHPL/rochpl-install.patch
+405
-0
No files found.
projects/rocHPL/rochpl-install.patch
0 → 100644
View file @
073fee70
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6b80b24..563122a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,7 +83,7 @@
foreach(i ${rochpl_device_source})
endforeach()
# HIP flags workaround while target_compile_options does not work
-list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -Wno-deprecated-declarations -fPIE -fopenmp")
+list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -Wno-deprecated-declarations -fPIE -fopenmp --gpu-max-threads-per-block=1024")
list(APPEND CMAKE_HOST_FLAGS "-Wno-deprecated-declarations")
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -94,25 +94,51 @@
else()
list(APPEND CMAKE_HOST_FLAGS "-O3;-march=native")
endif()
-# GPU arch targets
-set(TARGETS "gfx900;gfx906")
-if(HIP_VERSION VERSION_GREATER_EQUAL "3.7")
- set(TARGETS "${TARGETS};gfx908")
-endif()
-if(HIP_VERSION VERSION_GREATER_EQUAL "4.3")
- set(TARGETS "${TARGETS};gfx90a")
-endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "5.7")
- set(TARGETS "${TARGETS};gfx942")
-endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "6.5")
- set(TARGETS "${TARGETS};gfx950;gfx1100")
+set(ARCHS "") # use plural to indicate list
+if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "")
+ string(REPLACE "," ";" ARCHS "${HPL_BUILD_ARCH}")
+ list(TRANSFORM ARCHS STRIP)
+ list(REMOVE_DUPLICATES ARCHS)
+ message(STATUS "Using manually specified GPU targets: ${ARCHS}")
+else()
+ message(STATUS "Detecting available architecture")
+ ############ Find using rocminfo #####################
+ find_program(ROCMINFO_EXECUTABLE rocminfo)
+ if(ROCMINFO_EXECUTABLE)
+ execute_process(
+ COMMAND ${ROCMINFO_EXECUTABLE}
+ OUTPUT_VARIABLE ROCMINFO_OUTPUT
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ )
+
+ # 1) Only match lines where the token follows "Name:"
+ string(REGEX MATCHALL "Name:[ \t]+gfx[0-9a-z]+" ARCH_MATCHES "${ROCMINFO_OUTPUT}")
+
+ # 2) Strip the leading "Name: " to keep just gfx tokens
+ string(REGEX REPLACE "Name:[ \t]+" "" ARCHS "${ARCH_MATCHES}")
+
+ # 3) Remove duplicates
+ list(REMOVE_DUPLICATES ARCHS)
+
+ foreach(match ${ARCHS})
+ string(REGEX REPLACE "Name:\\s+" "" arch "${match}")
+ list(APPEND ARCH "${arch}")
+ endforeach()
+ endif()
endif()
if (HIP_VERSION VERSION_GREATER_EQUAL "7.0")
set(TARGETS "${TARGETS};gfx1201")
endif()
-foreach(target ${TARGETS})
+if(ARCHS STREQUAL "")
+ message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use ./install.sh --arch=gfxXXX")
+endif()
+
+message(STATUS "Building for GPU architecture: ${ARCHS}")
+
+# Generate HIP_HIPCC_FLAGS
+foreach(target ${ARCHS})
list(APPEND HIP_HIPCC_FLAGS "--offload-arch=${target}")
endforeach()
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 6d6be5d..d11c01a 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -83,25 +83,33 @@
find_package(ROCmCMakeBuildTools QUIET CONFIG PATHS ${CMAKE_PREFIX_PATH})
if(NOT ROCM_FOUND)
set(PROJECT_EXTERN_DIR ${CMAKE_CURRENT_BINARY_DIR}/extern)
set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download")
- file(DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip
- ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip STATUS status LOG log)
-
- list(GET status 0 status_code)
- list(GET status 1 status_string)
-
- if(NOT status_code EQUAL 0)
- message(FATAL_ERROR "error: downloading
- 'https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip' failed
- status_code: ${status_code}
- status_string: ${status_string}
- log: ${log}
- ")
+ set(rocm_cmake_zip ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip)
+
+ # Check if zip file already exists to skip download
+ if(EXISTS ${rocm_cmake_zip})
+ message("-- Using existing rocm-cmake zip file: ${rocm_cmake_zip}")
+ else()
+ file(DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip
+ ${rocm_cmake_zip} STATUS status LOG log)
+
+ list(GET status 0 status_code)
+ list(GET status 1 status_string)
+
+ if(NOT status_code EQUAL 0)
+ message(FATAL_ERROR "error: downloading
+ 'https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip' failed
+ status_code: ${status_code}
+ status_string: ${status_string}
+ log: ${log}
+ ")
+ endif()
endif()
- execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip
+ execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${rocm_cmake_zip}
WORKING_DIRECTORY ${PROJECT_EXTERN_DIR})
- find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag})
+ # find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag})
+ set(CMAKE_MODULE_PATH "${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/share/rocm/cmake;${CMAKE_MODULE_PATH}")
endif()
include(ROCMSetupVersion)
diff --git a/install.sh b/install.sh
index b30a3fb..4d3284b 100755
--- a/install.sh
+++ b/install.sh
@@ -2,7 +2,7 @@
# Author: Nico Trost
# Modified by: Noel Chalmers
-#set -x #echo on
+# set -euo pipefail
# #################################################
# helper functions
@@ -17,6 +17,7 @@
function display_help()
echo " [--with-rocm=<dir>] Path to ROCm install (Default: /opt/rocm)"
echo " [--with-rocblas=<dir>] Path to rocBLAS library (Default: /opt/rocm/rocblas)"
echo " [--with-mpi=<dir>] Path to external MPI install (Default: clone+build OpenMPI)"
+ echo " [--arch] Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)"
echo " [--with-mpi-gtl=<dir>] Path to external MPI-GTL install (Optional: defaults to no gtl support)"
echo " [--verbose-print] Verbose output during HPL setup (Default: true)"
echo " [--progress-report] Print progress report to terminal during HPL run (Default: true)"
@@ -33,7 +34,7 @@
supported_distro( )
fi
case "${ID}" in
- debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos)
+ debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky)
true
;;
*) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -68,11 +69,11 @@
exit_with_error( )
printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n"
;;
- centos|rhel|tencentos)
+ centos|rhel|tencentos|kylin)
printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n"
;;
- fedora)
+ fedora|rocky)
printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n"
;;
@@ -105,42 +106,63 @@
check_exit_code( )
# Clone and build OpenMPI+UCX in rochpl/tpl
install_openmpi( )
{
+ local install_dir=${PWD}/tpl
+ local ucx_prefix=${install_dir}/ucx
+ local ompi_prefix=${install_dir}/openmpi
+
#OpenMPI and UCX install to one of these locations depending on OS
- ucx_lib_folder=./tpl/ucx/lib
- ompi_lib_folder=./tpl/openmpi/lib
- ucx_lib64_folder=./tpl/ucx/lib64
- ompi_lib64_folder=./tpl/openmpi/lib64
-
- if [ ! -d "./tpl/ucx" ]; then
- mkdir -p tpl && cd tpl
- git clone --branch v1.18.0 https://github.com/openucx/ucx.git ucx
- check_exit_code 2
- cd ucx;
- ./autogen.sh; ./autogen.sh #why do we have to run this twice?
+ local ucx_lib_folder=${ucx_prefix}/lib
+ local ompi_lib_folder=${ompi_prefix}/lib
+ local ucx_lib64_folder=${ucx_prefix}/lib64
+ local ompi_lib64_folder=${ompi_prefix}/lib64
+
+ # Create the tpl directory
+ mkdir -p ${install_dir} && cd ${install_dir}
+
+ local ucx_version=1.20.0
+ local ucx_src=${install_dir}/ucx-${ucx_version}
+ local ucx_tarball=ucx-${ucx_version}.tar.gz
+ local ompi_version=5.0.9
+ local ompi_src=${install_dir}/openmpi-${ompi_version}
+ local ompi_tarball=openmpi-${ompi_version}.tar.gz
+
+ # Download UCX on demand
+ if [ ! -d "${ucx_src}" ]; then
+ if [ ! -f "${ucx_tarball}" ]; then
+ wget https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball}
+ fi
check_exit_code 2
- mkdir build; cd build
- ../contrib/configure-opt --prefix=${PWD}/../ --with-rocm=${with_rocm} --without-knem --without-cuda --without-java
+ tar -zxf ${ucx_tarball}
check_exit_code 2
- make -j$(nproc)
- check_exit_code 2
- make install
+ fi
+ # Download OpenMPI on demand
+ if [ ! -d "${ompi_src}" ]; then
+ if [ ! -f "${ompi_tarball}" ]; then
+ wget https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball}
+ fi
check_exit_code 2
- cd ../../..
- elif ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \
- [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) && \
- ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \
- [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then
- cd tpl/ucx;
- ./autogen.sh; ./autogen.sh
+ tar -zxf ${ompi_tarball}
check_exit_code 2
- mkdir build; cd build
- ../contrib/configure-opt --prefix=${PWD}/../ --with-rocm=${with_rocm} --without-knem --without-cuda --without-java
+ fi
+
+ # Build UCX on demand
+ if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \
+ [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) && \
+ ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \
+ [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then
+ cd ${ucx_src};
+ ./contrib/configure-release --prefix=${ucx_prefix} \
+ --enable-cma --enable-mt \
+ --with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \
+ --with-verbs=/usr/include --with-rdmacm=/usr \
+ --with-rocm=${with_rocm} \
+ --without-knem --without-cuda --without-java
check_exit_code 2
make -j$(nproc)
check_exit_code 2
make install
check_exit_code 2
- cd ../../..
+ cd ${install_dir}/..
fi
# Check for successful build
@@ -152,31 +174,26 @@
install_openmpi( )
exit 3
fi
- if [ ! -d "./tpl/openmpi" ]; then
- mkdir -p tpl && cd tpl
- git clone --branch v5.0.7 --recursive https://github.com/open-mpi/ompi.git openmpi
- check_exit_code 2
- cd openmpi; ./autogen.pl;
- check_exit_code 2
- mkdir build; cd build
- ../configure --prefix=${PWD}/../ --with-ucx=${PWD}/../../ucx --without-verbs --disable-man-pages --enable-mca-no-build=btl-uct
- check_exit_code 2
- make -j$(nproc)
- check_exit_code 2
- make install
- check_exit_code 2
- cd ../../..
- elif [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then
- cd tpl/openmpi; ./autogen.pl;
- check_exit_code 2
- mkdir build; cd build
- ../configure --prefix=${PWD}/../ --with-ucx=${PWD}/../../ucx --without-verbs --disable-man-pages --enable-mca-no-build=btl-uct
+ export LD_LIBRARY_PATH="${ucx_lib_folder}:${ucx_lib64_folder}:${LD_LIBRARY_PATH}"
+ export LIBRARY_PATH="${ucx_lib_folder}:${ucx_lib64_folder}:${LIBRARY_PATH}"
+ export CPATH="${ucx_prefix}/include:${CPATH}"
+
+ # Build OpenMPI on demand
+ if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then
+ cd ${ompi_src}
+ ./configure --prefix=${ompi_prefix} \
+ --with-ucx=${ucx_prefix} \
+ --with-rocm=${with_rocm} \
+ --disable-man-pages \
+ --enable-builtin-atomics \
+ --enable-wrapper-rpath \
+ --without-verbs --enable-mca-no-build=btl-uct
check_exit_code 2
make -j$(nproc)
check_exit_code 2
make install
check_exit_code 2
- cd ../../..
+ cd ${install_dir}/..
fi
# Check for successful build
@@ -184,6 +201,11 @@
install_openmpi( )
echo "Error: OpenMPI install unsuccessful."
exit_with_error 2
fi
+
+ export LD_LIBRARY_PATH="${ompi_lib_folder}:${LD_LIBRARY_PATH}"
+ export LIBRARY_PATH="${ompi_lib_folder}:${LIBRARY_PATH}"
+ export CPATH="${ompi_prefix}/include:${CPATH}"
+ export OPAL_PREFIX=${ompi_prefix}
}
# #################################################
@@ -232,7 +254,7 @@
enable_tracing=false
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ $? -eq 4 ]]; then
- GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-mpi-gtl:,with-rocblas:,verbose-print:,progress-report:,detailed-timing:,enable-tracing: --options hg -- "$@")
+ GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-mpi-gtl:,with-rocblas:,verbose-print:,arch:,progress-report:,detailed-timing:,enable-tracing: --options hg -- "$@")
else
echo "Need a new version of getopt"
exit_with_error 1
@@ -263,6 +285,9 @@
while true; do
--with-mpi)
with_mpi=${2}
shift 2 ;;
+ --arch)
+ arch=${2}
+ shift 2 ;;
--with-mpi-gtl)
with_mpi_gtl=${2}
shift 2 ;;
@@ -294,9 +319,6 @@
printf "\033[32mCreating project build directory in: \033[33m${build_dir}\033[0m
# #################################################
# prep
# #################################################
-# ensure a clean build environment
-rm -rf ${build_dir}
-
# Default cmake executable is called cmake
cmake_executable=cmake
@@ -311,7 +333,7 @@
pushd .
# #################################################
if [[ "${with_mpi}" == tpl/openmpi ]]; then
- with_mpi=${PWD}/tpl/openmpi
+ with_mpi=${ompi_prefix}
install_openmpi
fi
@@ -347,11 +369,14 @@
pushd .
if [[ "${enable_tracing}" == on || "${enable_tracing}" == true || "${enable_tracing}" == 1 || "${enable_tracing}" == enabled ]]; then
cmake_common_options="${cmake_common_options} -DHPL_TRACING=ON"
fi
+ if [[ -n "${arch}" ]]; then
+ cmake_common_options="${cmake_common_options} -DHPL_BUILD_ARCH=${arch}"
+ fi
shopt -u nocasematch
# Build library with AMD toolchain because of existence of device kernels
mkdir -p ${build_dir} && cd ${build_dir}
- ${cmake_executable} ${cmake_common_options} ..
+ ${cmake_executable} --fresh ${cmake_common_options} ..
check_exit_code 2
if [[ -e build.ninja ]]; then
diff --git a/src/HPL_pdtest.cpp b/src/HPL_pdtest.cpp
index 94a0d3f..3135763 100644
--- a/src/HPL_pdtest.cpp
+++ b/src/HPL_pdtest.cpp
@@ -212,7 +212,7 @@
void HPL_pdtest(HPL_T_test* TEST,
ctime(¤t_time_end));
}
#ifdef HPL_PROGRESS_REPORT
- printf("Final Score: %7.4e GFLOPS \n", Gflops);
+ printf("Final Score: %7.9e GFLOPS \n", Gflops);
#endif
}
#ifdef HPL_DETAILED_TIMING
diff --git a/src/pgesv/HPL_pdgesv.cpp b/src/pgesv/HPL_pdgesv.cpp
index d6c99c3..280a9a5 100644
--- a/src/pgesv/HPL_pdgesv.cpp
+++ b/src/pgesv/HPL_pdgesv.cpp
@@ -336,7 +336,7 @@
void HPL_pdgesv(HPL_T_grid* GRID, HPL_T_palg* ALGO, HPL_T_pmat* A) {
printf(" %9.3e |", step_gflops);
#endif
- printf(" %9.3e \n", gflops);
+ printf(" %9.9e \n", gflops);
}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment