diff --git a/CMakeLists.txt b/CMakeLists.txt index 91afcc4..b1c3ef6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,7 +88,7 @@ foreach(i ${rochplmxp_device_source}) endforeach() # HIP flags workaround while target_compile_options does not work -list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -fPIE") +list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -fPIE --gpu-max-threads-per-block=1024") list(APPEND CMAKE_HOST_FLAGS "") if (CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -99,21 +99,50 @@ else() list(APPEND CMAKE_HOST_FLAGS "-O3;-march=native;-Wno-deprecated-declarations") endif() -# GPU arch targets -set(TARGETS "gfx900;gfx906") -if(HIP_VERSION VERSION_GREATER_EQUAL "3.7") - set(TARGETS "${TARGETS};gfx908") -endif() -if(HIP_VERSION VERSION_GREATER_EQUAL "4.3") - set(TARGETS "${TARGETS};gfx90a") +set(ARCHS "") # use plural to indicate list +if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "") + string(REPLACE "," ";" ARCHS "${HPL_BUILD_ARCH}") + list(TRANSFORM ARCHS STRIP) + list(REMOVE_DUPLICATES ARCHS) + message(STATUS "Using manually specified GPU targets: ${ARCHS}") +else() + message(STATUS "Detecting available architecture") + ############ Find using rocminfo ##################### + find_program(ROCMINFO_EXECUTABLE rocminfo) + if(ROCMINFO_EXECUTABLE) + execute_process( + COMMAND ${ROCMINFO_EXECUTABLE} + OUTPUT_VARIABLE ROCMINFO_OUTPUT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + # 1) Only match lines where the token follows "Name:" + string(REGEX MATCHALL "Name:[ \t]+gfx[0-9a-z]+" ARCH_MATCHES "${ROCMINFO_OUTPUT}") + + # 2) Strip the leading "Name: " to keep just gfx tokens + string(REGEX REPLACE "Name:[ \t]+" "" ARCHS "${ARCH_MATCHES}") + + # 3) Remove duplicates + list(REMOVE_DUPLICATES ARCHS) + + foreach(match ${ARCHS}) + string(REGEX REPLACE "Name:\\s+" "" arch "${match}") + list(APPEND ARCH "${arch}") + endforeach() + endif() endif() -if (HIP_VERSION VERSION_GREATER_EQUAL "5.7") - set(TARGETS "${TARGETS};gfx942") +if (HIP_VERSION VERSION_GREATER_EQUAL "7.0") + set(TARGETS "${TARGETS};gfx1201") endif() -if (HIP_VERSION VERSION_GREATER_EQUAL "6.5") - set(TARGETS "${TARGETS};gfx950") + +if(ARCHS STREQUAL "") + message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use ./install.sh --arch=gfxXXX") endif() +message(STATUS "Building for GPU architecture: ${ARCHS}") + +# Generate HIP_HIPCC_FLAGS foreach(target ${TARGETS}) list(APPEND HIP_HIPCC_FLAGS "--offload-arch=${target}") endforeach() diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 164d06d..78cc857 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -109,7 +109,8 @@ if(NOT ROCM_FOUND) execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}) - find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}) + # find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}) + set(CMAKE_MODULE_PATH "${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/share/rocm/cmake;${CMAKE_MODULE_PATH}") endif() include(ROCMSetupVersion) diff --git a/install.sh b/install.sh index de72a20..6542e7f 100755 --- a/install.sh +++ b/install.sh @@ -18,6 +18,7 @@ function display_help() echo " [--with-rocblas=] Path to rocBLAS library (Default: /opt/rocm/rocblas)" echo " [--with-rocsolver=] Path to rocSOLVER library (Default: /opt/rocm/rocsolver)" echo " [--with-mpi=] Path to external MPI install (Default: clone+build OpenMPI)" + echo " [--arch] Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)" echo " [--verbose-print] Verbose output during HPL setup (Default: true)" echo " [--enable-tracing] Annotate profiler traces with rocTX markers (Default: false)" echo " [--progress-report] Print progress report to terminal during HPL run (Default: true)" @@ -33,7 +34,7 @@ supported_distro( ) fi case "${ID}" in - ubuntu|centos|rhel|fedora|sles) + ubuntu|centos|rhel|fedora|sles|kylin|rocky) true ;; *) printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n" @@ -68,11 +69,11 @@ exit_with_error( ) printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n" ;; - centos|rhel) + centos|rhel|kylin) printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n" ;; - fedora) + fedora|rocky) printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n" ;; @@ -145,6 +146,11 @@ install_openmpi( ) exit 3 fi + UCX_ROOT="$(pwd)/tpl/ucx" + export LD_LIBRARY_PATH="${UCX_ROOT}/lib:${UCX_ROOT}/lib64:${LD_LIBRARY_PATH}" + export LIBRARY_PATH="${UCX_ROOT}/lib:${UCX_ROOT}/lib64:${LIBRARY_PATH}" + export CPATH="${UCX_ROOT}/include:${CPATH}" + if [ ! -d "./tpl/openmpi" ]; then mkdir -p tpl && cd tpl git clone --branch v5.0.7 --recursive https://github.com/open-mpi/ompi.git openmpi @@ -225,7 +231,7 @@ detailed_timing=true # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,with-rocsolver:,verbose-print:,enable-tracing:,progress-report:,detailed-timing: --options hg -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,with-rocsolver:,arch:,verbose-print:,enable-tracing:,progress-report:,detailed-timing: --options hg -- "$@") else echo "Need a new version of getopt" exit_with_error 1 @@ -262,6 +268,9 @@ while true; do --with-rocsolver) with_rocsolver=${2} shift 2 ;; + --arch) + arch=${2} + shift 2 ;; --verbose-print) verbose_print=${2} shift 2 ;; @@ -335,6 +344,9 @@ pushd . if [[ "${enable_tracing}" == on || "${enable_tracing}" == true || "${enable_tracing}" == 1 || "${enable_tracing}" == enabled ]]; then cmake_common_options="${cmake_common_options} -DHPLMXP_TRACING=ON" fi + if [[ -n "${arch}" ]]; then + cmake_common_options="${cmake_common_options} -DHPL_BUILD_ARCH=${arch}" + fi shopt -u nocasematch # Build library with AMD toolchain because of existence of device kernels diff --git a/scripts/mpirun_rochplmxp.in b/scripts/mpirun_rochplmxp.in index 5ad6166..89bb1ab 100755 --- a/scripts/mpirun_rochplmxp.in +++ b/scripts/mpirun_rochplmxp.in @@ -44,7 +44,7 @@ supported_distro( ) fi case "${ID}" in - ubuntu|centos|rhel|fedora|sles) + ubuntu|centos|rhel|fedora|sles|kylin|rocky) true ;; *) printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n" @@ -98,6 +98,14 @@ filename=HPL-MxP.dat inputfile=false cmdrun=false +tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl +ompi_prefix=$tpl_dir/openmpi +ompi_lib_dir=$tpl_dir/openmpi/lib +ucx_lib_dir=$tpl_dir/ucx/lib + +export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH +export OPAL_PREFIX=$ompi_prefix + # ################################################# # MPI Args # ################################################# @@ -113,7 +121,10 @@ if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then ompi_info=$(dirname ${mpi_bin})/ompi_info if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then # ucx-specific args - mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}" + mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct \ + -x UCX_TLS=self,sm,rocm_ipc,rocm_copy,rc_mlx5 \ + -x UCX_MEMTYPE_CACHE=n \ + ${mpi_args}" fi fi @@ -144,7 +155,7 @@ while true; do exit 0 ;; --version) - ${mpi_bin} -np 1 ${mpi_args} ${rochplmxp_runscript} --version + ${mpi_bin} --allow-run-as-root -np 1 ${mpi_args} ${rochplmxp_runscript} --version exit 0 ;; -P) @@ -200,4 +211,4 @@ else fi #run -${mpi_bin} -np ${np} ${mpi_args} ${rochplmxp_runscript} ${rochplmxp_args} +${mpi_bin} --allow-run-as-root -np ${np} ${mpi_args} ${rochplmxp_runscript} ${rochplmxp_args} diff --git a/scripts/run_rochplmxp.in b/scripts/run_rochplmxp.in index 698d3c1..bf1a15a 100755 --- a/scripts/run_rochplmxp.in +++ b/scripts/run_rochplmxp.in @@ -44,7 +44,7 @@ supported_distro( ) fi case "${ID}" in - ubuntu|centos|rhel|fedora|sles) + ubuntu|centos|rhel|fedora|sles|kylin|rocky) true ;; *) printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n" @@ -98,7 +98,9 @@ filename=HPL-MxP.dat inputfile=false cmdrun=false -export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH +tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl +ucx_lib_dir=$tpl_dir/ucx/lib +export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH # ################################################# # Parameter parsing diff --git a/src/hplmxp_ptest.cpp b/src/hplmxp_ptest.cpp index 11d0f44..e8b1eee 100644 --- a/src/hplmxp_ptest.cpp +++ b/src/hplmxp_ptest.cpp @@ -211,7 +211,7 @@ void HPLMXP_ptest(HPLMXP_T_test& test, ctime(¤t_time_end)); } #ifdef HPLMXP_PROGRESS_REPORT - printf("Final Score: %7.4e GFLOPS \n", Gflops); + printf("Final Score: %7.9e GFLOPS \n", Gflops); #endif } #ifdef HPLMXP_DETAILED_TIMING