diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91afcc4..b1c3ef6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,7 +88,7 @@ foreach(i ${rochplmxp_device_source})
endforeach()
# HIP flags workaround while target_compile_options does not work
-list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -fPIE")
+list(APPEND HIP_HIPCC_FLAGS "-Wno-unused-command-line-argument -fPIE --gpu-max-threads-per-block=1024")
list(APPEND CMAKE_HOST_FLAGS "")
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -99,21 +99,50 @@ else()
list(APPEND CMAKE_HOST_FLAGS "-O3;-march=native;-Wno-deprecated-declarations")
endif()
-# GPU arch targets
-set(TARGETS "gfx900;gfx906")
-if(HIP_VERSION VERSION_GREATER_EQUAL "3.7")
- set(TARGETS "${TARGETS};gfx908")
-endif()
-if(HIP_VERSION VERSION_GREATER_EQUAL "4.3")
- set(TARGETS "${TARGETS};gfx90a")
+set(ARCHS "") # use plural to indicate list
+if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "")
+ string(REPLACE "," ";" ARCHS "${HPL_BUILD_ARCH}")
+ list(TRANSFORM ARCHS STRIP)
+ list(REMOVE_DUPLICATES ARCHS)
+ message(STATUS "Using manually specified GPU targets: ${ARCHS}")
+else()
+ message(STATUS "Detecting available architecture")
+ ############ Find using rocminfo #####################
+ find_program(ROCMINFO_EXECUTABLE rocminfo)
+ if(ROCMINFO_EXECUTABLE)
+ execute_process(
+ COMMAND ${ROCMINFO_EXECUTABLE}
+ OUTPUT_VARIABLE ROCMINFO_OUTPUT
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ )
+
+ # 1) Only match lines where the token follows "Name:"
+ string(REGEX MATCHALL "Name:[ \t]+gfx[0-9a-z]+" ARCH_MATCHES "${ROCMINFO_OUTPUT}")
+
+ # 2) Strip the leading "Name: " to keep just gfx tokens
+ string(REGEX REPLACE "Name:[ \t]+" "" ARCHS "${ARCH_MATCHES}")
+
+ # 3) Remove duplicates
+ list(REMOVE_DUPLICATES ARCHS)
+
+ foreach(match ${ARCHS})
+ string(REGEX REPLACE "Name:\\s+" "" arch "${match}")
+ list(APPEND ARCH "${arch}")
+ endforeach()
+ endif()
endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "5.7")
- set(TARGETS "${TARGETS};gfx942")
+if (HIP_VERSION VERSION_GREATER_EQUAL "7.0")
+ set(TARGETS "${TARGETS};gfx1201")
endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "6.5")
- set(TARGETS "${TARGETS};gfx950")
+
+if(ARCHS STREQUAL "")
+ message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use ./install.sh --arch=gfxXXX")
endif()
+message(STATUS "Building for GPU architecture: ${ARCHS}")
+
+# Generate HIP_HIPCC_FLAGS
foreach(target ${TARGETS})
list(APPEND HIP_HIPCC_FLAGS "--offload-arch=${target}")
endforeach()
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 164d06d..78cc857 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -109,7 +109,8 @@ if(NOT ROCM_FOUND)
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip
WORKING_DIRECTORY ${PROJECT_EXTERN_DIR})
- find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag})
+ # find_package(ROCmCMakeBuildTools REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag})
+ set(CMAKE_MODULE_PATH "${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/share/rocm/cmake;${CMAKE_MODULE_PATH}")
endif()
include(ROCMSetupVersion)
diff --git a/install.sh b/install.sh
index de72a20..6542e7f 100755
--- a/install.sh
+++ b/install.sh
@@ -18,6 +18,7 @@ function display_help()
echo " [--with-rocblas=
] Path to rocBLAS library (Default: /opt/rocm/rocblas)"
echo " [--with-rocsolver=] Path to rocSOLVER library (Default: /opt/rocm/rocsolver)"
echo " [--with-mpi=] Path to external MPI install (Default: clone+build OpenMPI)"
+ echo " [--arch] Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)"
echo " [--verbose-print] Verbose output during HPL setup (Default: true)"
echo " [--enable-tracing] Annotate profiler traces with rocTX markers (Default: false)"
echo " [--progress-report] Print progress report to terminal during HPL run (Default: true)"
@@ -33,7 +34,7 @@ supported_distro( )
fi
case "${ID}" in
- ubuntu|centos|rhel|fedora|sles)
+ ubuntu|centos|rhel|fedora|sles|kylin|rocky)
true
;;
*) printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -68,11 +69,11 @@ exit_with_error( )
printf "sudo apt install -y ${library_dependencies_ubuntu[*]}\n"
;;
- centos|rhel)
+ centos|rhel|kylin)
printf "sudo yum -y --nogpgcheck install ${library_dependencies_centos[*]}\n"
;;
- fedora)
+ fedora|rocky)
printf "sudo dnf install -y ${library_dependencies_fedora[*]}\n"
;;
@@ -145,6 +146,11 @@ install_openmpi( )
exit 3
fi
+ UCX_ROOT="$(pwd)/tpl/ucx"
+ export LD_LIBRARY_PATH="${UCX_ROOT}/lib:${UCX_ROOT}/lib64:${LD_LIBRARY_PATH}"
+ export LIBRARY_PATH="${UCX_ROOT}/lib:${UCX_ROOT}/lib64:${LIBRARY_PATH}"
+ export CPATH="${UCX_ROOT}/include:${CPATH}"
+
if [ ! -d "./tpl/openmpi" ]; then
mkdir -p tpl && cd tpl
git clone --branch v5.0.7 --recursive https://github.com/open-mpi/ompi.git openmpi
@@ -225,7 +231,7 @@ detailed_timing=true
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ $? -eq 4 ]]; then
- GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,with-rocsolver:,verbose-print:,enable-tracing:,progress-report:,detailed-timing: --options hg -- "$@")
+ GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,with-rocsolver:,arch:,verbose-print:,enable-tracing:,progress-report:,detailed-timing: --options hg -- "$@")
else
echo "Need a new version of getopt"
exit_with_error 1
@@ -262,6 +268,9 @@ while true; do
--with-rocsolver)
with_rocsolver=${2}
shift 2 ;;
+ --arch)
+ arch=${2}
+ shift 2 ;;
--verbose-print)
verbose_print=${2}
shift 2 ;;
@@ -335,6 +344,9 @@ pushd .
if [[ "${enable_tracing}" == on || "${enable_tracing}" == true || "${enable_tracing}" == 1 || "${enable_tracing}" == enabled ]]; then
cmake_common_options="${cmake_common_options} -DHPLMXP_TRACING=ON"
fi
+ if [[ -n "${arch}" ]]; then
+ cmake_common_options="${cmake_common_options} -DHPL_BUILD_ARCH=${arch}"
+ fi
shopt -u nocasematch
# Build library with AMD toolchain because of existence of device kernels
diff --git a/scripts/mpirun_rochplmxp.in b/scripts/mpirun_rochplmxp.in
index 5ad6166..89bb1ab 100755
--- a/scripts/mpirun_rochplmxp.in
+++ b/scripts/mpirun_rochplmxp.in
@@ -44,7 +44,7 @@ supported_distro( )
fi
case "${ID}" in
- ubuntu|centos|rhel|fedora|sles)
+ ubuntu|centos|rhel|fedora|sles|kylin|rocky)
true
;;
*) printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -98,6 +98,14 @@ filename=HPL-MxP.dat
inputfile=false
cmdrun=false
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ompi_prefix=$tpl_dir/openmpi
+ompi_lib_dir=$tpl_dir/openmpi/lib
+ucx_lib_dir=$tpl_dir/ucx/lib
+
+export LD_LIBRARY_PATH=$ompi_lib_dir:$ucx_lib_dir:$LD_LIBRARY_PATH
+export OPAL_PREFIX=$ompi_prefix
+
# #################################################
# MPI Args
# #################################################
@@ -113,7 +121,10 @@ if [[ $(${mpi_bin} --version | grep "open-mpi") ]]; then
ompi_info=$(dirname ${mpi_bin})/ompi_info
if [[ $(${ompi_info} | grep "MCA pml: ucx") ]]; then
# ucx-specific args
- mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct ${mpi_args}"
+ mpi_args="--mca pml ucx --mca btl ^vader,tcp,openib,uct \
+ -x UCX_TLS=self,sm,rocm_ipc,rocm_copy,rc_mlx5 \
+ -x UCX_MEMTYPE_CACHE=n \
+ ${mpi_args}"
fi
fi
@@ -144,7 +155,7 @@ while true; do
exit 0
;;
--version)
- ${mpi_bin} -np 1 ${mpi_args} ${rochplmxp_runscript} --version
+ ${mpi_bin} --allow-run-as-root -np 1 ${mpi_args} ${rochplmxp_runscript} --version
exit 0
;;
-P)
@@ -200,4 +211,4 @@ else
fi
#run
-${mpi_bin} -np ${np} ${mpi_args} ${rochplmxp_runscript} ${rochplmxp_args}
+${mpi_bin} --allow-run-as-root -np ${np} ${mpi_args} ${rochplmxp_runscript} ${rochplmxp_args}
diff --git a/scripts/run_rochplmxp.in b/scripts/run_rochplmxp.in
index 698d3c1..bf1a15a 100755
--- a/scripts/run_rochplmxp.in
+++ b/scripts/run_rochplmxp.in
@@ -44,7 +44,7 @@ supported_distro( )
fi
case "${ID}" in
- ubuntu|centos|rhel|fedora|sles)
+ ubuntu|centos|rhel|fedora|sles|kylin|rocky)
true
;;
*) printf "This script is currently supported on Ubuntu, CentOS, RHEL, Fedora and SLES\n"
@@ -98,7 +98,9 @@ filename=HPL-MxP.dat
inputfile=false
cmdrun=false
-export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:${rocm_dir}/lib:$LD_LIBRARY_PATH
+tpl_dir=$(dirname "$(readlink -f "$0")")/../tpl
+ucx_lib_dir=$tpl_dir/ucx/lib
+export LD_LIBRARY_PATH=${rocblas_dir}:${blas_dir}:$ucx_lib_dir:${rocm_dir}/lib:$LD_LIBRARY_PATH
# #################################################
# Parameter parsing
diff --git a/src/hplmxp_ptest.cpp b/src/hplmxp_ptest.cpp
index 11d0f44..e8b1eee 100644
--- a/src/hplmxp_ptest.cpp
+++ b/src/hplmxp_ptest.cpp
@@ -211,7 +211,7 @@ void HPLMXP_ptest(HPLMXP_T_test& test,
ctime(¤t_time_end));
}
#ifdef HPLMXP_PROGRESS_REPORT
- printf("Final Score: %7.4e GFLOPS \n", Gflops);
+ printf("Final Score: %7.9e GFLOPS \n", Gflops);
#endif
}
#ifdef HPLMXP_DETAILED_TIMING