diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b80b24..563122a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,7 +83,7 @@ foreach(i ${rochpl_device_source}) endforeach() # HIP flags workaround while target_compile_options does not work -list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -Wno-deprecated-declarations -fPIE -fopenmp") +list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -Wno-deprecated-declarations -fPIE -fopenmp --gpu-max-threads-per-block=1024") list(APPEND CMAKE_HOST_FLAGS "-Wno-deprecated-declarations") if (CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -94,25 +94,51 @@ else() list(APPEND CMAKE_HOST_FLAGS "-O3;-march=native") endif() -# GPU arch targets -set(TARGETS "gfx900;gfx906") -if(HIP_VERSION VERSION_GREATER_EQUAL "3.7") - set(TARGETS "${TARGETS};gfx908") -endif() -if(HIP_VERSION VERSION_GREATER_EQUAL "4.3") - set(TARGETS "${TARGETS};gfx90a") -endif() -if (HIP_VERSION VERSION_GREATER_EQUAL "5.7") - set(TARGETS "${TARGETS};gfx942") -endif() -if (HIP_VERSION VERSION_GREATER_EQUAL "6.5") - set(TARGETS "${TARGETS};gfx950;gfx1100") +set(ARCHS "") # use plural to indicate list +if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "") + string(REPLACE "," ";" ARCHS "${HPL_BUILD_ARCH}") + list(TRANSFORM ARCHS STRIP) + list(REMOVE_DUPLICATES ARCHS) + message(STATUS "Using manually specified GPU targets: ${ARCHS}") +else() + message(STATUS "Detecting available architecture") + ############ Find using rocminfo ##################### + find_program(ROCMINFO_EXECUTABLE rocminfo) + if(ROCMINFO_EXECUTABLE) + execute_process( + COMMAND ${ROCMINFO_EXECUTABLE} + OUTPUT_VARIABLE ROCMINFO_OUTPUT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + # 1) Only match lines where the token follows "Name:" + string(REGEX MATCHALL "Name:[ \t]+gfx[0-9a-z]+" ARCH_MATCHES "${ROCMINFO_OUTPUT}") + + # 2) Strip the leading "Name: " to keep just gfx tokens + string(REGEX REPLACE "Name:[ \t]+" "" ARCHS "${ARCH_MATCHES}") + + # 3) Remove duplicates + list(REMOVE_DUPLICATES ARCHS) + + foreach(match ${ARCHS}) + string(REGEX REPLACE "Name:\\s+" "" arch "${match}") + list(APPEND ARCH "${arch}") + endforeach() + endif() endif() if (HIP_VERSION VERSION_GREATER_EQUAL "7.0") set(TARGETS "${TARGETS};gfx1201") endif() -foreach(target ${TARGETS}) +if(ARCHS STREQUAL "") + message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use ./install.sh --arch=gfxXXX") +endif() + +message(STATUS "Building for GPU architecture: ${ARCHS}") + +# Generate HIP_HIPCC_FLAGS +foreach(target ${ARCHS}) list(APPEND HIP_HIPCC_FLAGS "--offload-arch=${target}") endforeach() diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 6d6be5d..d11c01a 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -83,25 +83,33 @@ find_package(ROCmCMakeBuildTools QUIET CONFIG PATHS ${CMAKE_PREFIX_PATH}) if(NOT ROCM_FOUND) set(PROJECT_EXTERN_DIR ${CMAKE_CURRENT_BINARY_DIR}/extern) set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") - file(DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip - ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip STATUS status LOG log) - - list(GET status 0 status_code) - list(GET status 1 status_string) - - if(NOT status_code EQUAL 0) - message(FATAL_ERROR "error: downloading - 'https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip' failed - status_code: ${status_code} - status_string: ${status_string} - log: ${log} - ") + set(rocm_cmake_zip ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip) + + # Check if zip file already exists to skip download + if(EXISTS ${rocm_cmake_zip}) + message("-- Using existing rocm-cmake zip file: ${rocm_cmake_zip}") + else() + file(DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip + ${rocm_cmake_zip} STATUS status LOG log) + + list(GET status 0 status_code) + list(GET status 1 status_string) + + if(NOT status_code EQUAL 0) + message(FATAL_ERROR "error: downloading + 'https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip' failed + status_code: ${status_code} + status_string: ${status_string} + log: ${log} + ") + endif() endif() - execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip + execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${rocm_cmake_zip} WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}) - find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}) + # find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}) + set(CMAKE_MODULE_PATH "${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/share/rocm/cmake;${CMAKE_MODULE_PATH}") endif() include(ROCMSetupVersion) diff --git a/install.sh b/install.sh index b30a3fb..4d3284b 100755 --- a/install.sh +++ b/install.sh @@ -2,7 +2,7 @@ # Author: Nico Trost # Modified by: Noel Chalmers -#set -x #echo on +# set -euo pipefail # ################################################# # helper functions @@ -17,6 +17,7 @@ function display_help() echo " [--with-rocm=] Path to ROCm install (Default: /opt/rocm)" echo " [--with-rocblas=] Path to rocBLAS library (Default: /opt/rocm/rocblas)" echo " [--with-mpi=] Path to external MPI install (Default: clone+build OpenMPI)" + echo " [--arch] Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)" echo " [--with-mpi-gtl=] Path to external MPI-GTL install (Optional: defaults to no gtl support)" echo " [--verbose-print] Verbose output during HPL setup (Default: true)" echo " [--progress-report] Print progress report to terminal during HPL run (Default: true)" @@ -33,7 +34,7 @@ supported_distro( ) fi case "${ID}" in - debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos) + debian|linuxmint|ubuntu|centos|rhel|fedora|sles|tencentos|kylin|rocky) true ;; *) printf "This script is currently supported on Debian, Linuxmint, Ubuntu, CentOS, RHEL, Fedora and SLES\n" @@ -68,11 +69,11 @@ exit_with_error( ) printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n" ;; - centos|rhel|tencentos) + centos|rhel|tencentos|kylin) printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n" ;; - fedora) + fedora|rocky) printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n" ;; @@ -105,42 +106,63 @@ check_exit_code( ) # Clone and build OpenMPI+UCX in rochpl/tpl install_openmpi( ) { + local install_dir=${PWD}/tpl + local ucx_prefix=${install_dir}/ucx + local ompi_prefix=${install_dir}/openmpi + #OpenMPI and UCX install to one of these locations depending on OS - ucx_lib_folder=./tpl/ucx/lib - ompi_lib_folder=./tpl/openmpi/lib - ucx_lib64_folder=./tpl/ucx/lib64 - ompi_lib64_folder=./tpl/openmpi/lib64 - - if [ ! -d "./tpl/ucx" ]; then - mkdir -p tpl && cd tpl - git clone --branch v1.18.0 https://github.com/openucx/ucx.git ucx - check_exit_code 2 - cd ucx; - ./autogen.sh; ./autogen.sh #why do we have to run this twice? + local ucx_lib_folder=${ucx_prefix}/lib + local ompi_lib_folder=${ompi_prefix}/lib + local ucx_lib64_folder=${ucx_prefix}/lib64 + local ompi_lib64_folder=${ompi_prefix}/lib64 + + # Create the tpl directory + mkdir -p ${install_dir} && cd ${install_dir} + + local ucx_version=1.20.0 + local ucx_src=${install_dir}/ucx-${ucx_version} + local ucx_tarball=ucx-${ucx_version}.tar.gz + local ompi_version=5.0.9 + local ompi_src=${install_dir}/openmpi-${ompi_version} + local ompi_tarball=openmpi-${ompi_version}.tar.gz + + # Download UCX on demand + if [ ! -d "${ucx_src}" ]; then + if [ ! -f "${ucx_tarball}" ]; then + wget https://github.com/openucx/ucx/releases/download/v${ucx_version}/${ucx_tarball} + fi check_exit_code 2 - mkdir build; cd build - ../contrib/configure-opt --prefix=${PWD}/../ --with-rocm=${with_rocm} --without-knem --without-cuda --without-java + tar -zxf ${ucx_tarball} check_exit_code 2 - make -j$(nproc) - check_exit_code 2 - make install + fi + # Download OpenMPI on demand + if [ ! -d "${ompi_src}" ]; then + if [ ! -f "${ompi_tarball}" ]; then + wget https://download.open-mpi.org/release/open-mpi/v${ompi_version%.*}/${ompi_tarball} + fi check_exit_code 2 - cd ../../.. - elif ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \ - [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) && \ - ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \ - [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then - cd tpl/ucx; - ./autogen.sh; ./autogen.sh + tar -zxf ${ompi_tarball} check_exit_code 2 - mkdir build; cd build - ../contrib/configure-opt --prefix=${PWD}/../ --with-rocm=${with_rocm} --without-knem --without-cuda --without-java + fi + + # Build UCX on demand + if ([ ! -f "${ucx_lib_folder}/libucm.so" ] || [ ! -f "${ucx_lib_folder}/libucp.so" ] || \ + [ ! -f "${ucx_lib_folder}/libucs.so" ] || [ ! -f "${ucx_lib_folder}/libuct.so" ]) && \ + ([ ! -f "${ucx_lib64_folder}/libucm.so" ] || [ ! -f "${ucx_lib64_folder}/libucp.so" ] || \ + [ ! -f "${ucx_lib64_folder}/libucs.so" ] || [ ! -f "${ucx_lib64_folder}/libuct.so" ]); then + cd ${ucx_src}; + ./contrib/configure-release --prefix=${ucx_prefix} \ + --enable-cma --enable-mt \ + --with-mlx5 --with-rc --with-ud --with-dc --with-dm --with-ib_hw_tm \ + --with-verbs=/usr/include --with-rdmacm=/usr \ + --with-rocm=${with_rocm} \ + --without-knem --without-cuda --without-java check_exit_code 2 make -j$(nproc) check_exit_code 2 make install check_exit_code 2 - cd ../../.. + cd ${install_dir}/.. fi # Check for successful build @@ -152,31 +174,26 @@ install_openmpi( ) exit 3 fi - if [ ! -d "./tpl/openmpi" ]; then - mkdir -p tpl && cd tpl - git clone --branch v5.0.7 --recursive https://github.com/open-mpi/ompi.git openmpi - check_exit_code 2 - cd openmpi; ./autogen.pl; - check_exit_code 2 - mkdir build; cd build - ../configure --prefix=${PWD}/../ --with-ucx=${PWD}/../../ucx --without-verbs --disable-man-pages --enable-mca-no-build=btl-uct - check_exit_code 2 - make -j$(nproc) - check_exit_code 2 - make install - check_exit_code 2 - cd ../../.. - elif [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then - cd tpl/openmpi; ./autogen.pl; - check_exit_code 2 - mkdir build; cd build - ../configure --prefix=${PWD}/../ --with-ucx=${PWD}/../../ucx --without-verbs --disable-man-pages --enable-mca-no-build=btl-uct + export LD_LIBRARY_PATH="${ucx_lib_folder}:${ucx_lib64_folder}:${LD_LIBRARY_PATH}" + export LIBRARY_PATH="${ucx_lib_folder}:${ucx_lib64_folder}:${LIBRARY_PATH}" + export CPATH="${ucx_prefix}/include:${CPATH}" + + # Build OpenMPI on demand + if [ ! -f "${ompi_lib_folder}/libmpi.so" ] && [ ! -f "${ompi_lib64_folder}/libmpi.so" ]; then + cd ${ompi_src} + ./configure --prefix=${ompi_prefix} \ + --with-ucx=${ucx_prefix} \ + --with-rocm=${with_rocm} \ + --disable-man-pages \ + --enable-builtin-atomics \ + --enable-wrapper-rpath \ + --without-verbs --enable-mca-no-build=btl-uct check_exit_code 2 make -j$(nproc) check_exit_code 2 make install check_exit_code 2 - cd ../../.. + cd ${install_dir}/.. fi # Check for successful build @@ -184,6 +201,11 @@ install_openmpi( ) echo "Error: OpenMPI install unsuccessful." exit_with_error 2 fi + + export LD_LIBRARY_PATH="${ompi_lib_folder}:${LD_LIBRARY_PATH}" + export LIBRARY_PATH="${ompi_lib_folder}:${LIBRARY_PATH}" + export CPATH="${ompi_prefix}/include:${CPATH}" + export OPAL_PREFIX=${ompi_prefix} } # ################################################# @@ -232,7 +254,7 @@ enable_tracing=false # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-mpi-gtl:,with-rocblas:,verbose-print:,progress-report:,detailed-timing:,enable-tracing: --options hg -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-mpi-gtl:,with-rocblas:,verbose-print:,arch:,progress-report:,detailed-timing:,enable-tracing: --options hg -- "$@") else echo "Need a new version of getopt" exit_with_error 1 @@ -263,6 +285,9 @@ while true; do --with-mpi) with_mpi=${2} shift 2 ;; + --arch) + arch=${2} + shift 2 ;; --with-mpi-gtl) with_mpi_gtl=${2} shift 2 ;; @@ -294,9 +319,6 @@ printf "\033[32mCreating project build directory in: \033[33m${build_dir}\033[0m # ################################################# # prep # ################################################# -# ensure a clean build environment -rm -rf ${build_dir} - # Default cmake executable is called cmake cmake_executable=cmake @@ -311,7 +333,7 @@ pushd . # ################################################# if [[ "${with_mpi}" == tpl/openmpi ]]; then - with_mpi=${PWD}/tpl/openmpi + with_mpi=${ompi_prefix} install_openmpi fi @@ -347,11 +369,14 @@ pushd . if [[ "${enable_tracing}" == on || "${enable_tracing}" == true || "${enable_tracing}" == 1 || "${enable_tracing}" == enabled ]]; then cmake_common_options="${cmake_common_options} -DHPL_TRACING=ON" fi + if [[ -n "${arch}" ]]; then + cmake_common_options="${cmake_common_options} -DHPL_BUILD_ARCH=${arch}" + fi shopt -u nocasematch # Build library with AMD toolchain because of existence of device kernels mkdir -p ${build_dir} && cd ${build_dir} - ${cmake_executable} ${cmake_common_options} .. + ${cmake_executable} --fresh ${cmake_common_options} .. check_exit_code 2 if [[ -e build.ninja ]]; then diff --git a/src/HPL_pdtest.cpp b/src/HPL_pdtest.cpp index 94a0d3f..3135763 100644 --- a/src/HPL_pdtest.cpp +++ b/src/HPL_pdtest.cpp @@ -212,7 +212,7 @@ void HPL_pdtest(HPL_T_test* TEST, ctime(¤t_time_end)); } #ifdef HPL_PROGRESS_REPORT - printf("Final Score: %7.4e GFLOPS \n", Gflops); + printf("Final Score: %7.9e GFLOPS \n", Gflops); #endif } #ifdef HPL_DETAILED_TIMING diff --git a/src/pgesv/HPL_pdgesv.cpp b/src/pgesv/HPL_pdgesv.cpp index d6c99c3..280a9a5 100644 --- a/src/pgesv/HPL_pdgesv.cpp +++ b/src/pgesv/HPL_pdgesv.cpp @@ -336,7 +336,7 @@ void HPL_pdgesv(HPL_T_grid* GRID, HPL_T_palg* ALGO, HPL_T_pmat* A) { printf(" %9.3e |", step_gflops); #endif - printf(" %9.3e \n", gflops); + printf(" %9.9e \n", gflops); } #endif