Commit 0f367454 authored by akolliasAMD's avatar akolliasAMD
Browse files

cherry picked from commit 02ce785c

parent f6cc9923
...@@ -6,7 +6,53 @@ else() ...@@ -6,7 +6,53 @@ else()
endif() endif()
cmake_minimum_required(VERSION 3.5) cmake_minimum_required(VERSION 3.5)
project(TransferBench VERSION 1.51.0 LANGUAGES CXX) project(TransferBench VERSION 1.52.0 LANGUAGES CXX)
# Default GPU architectures to build
#==================================================================================================
set(DEFAULT_GPUS
gfx906
gfx908
gfx90a
gfx942
gfx1030
gfx1100
gfx1101
gfx1102
gfx1200
gfx1201)
# Build only for local GPU architecture
if (BUILD_LOCAL_GPU_TARGET_ONLY)
message(STATUS "Building only for local GPU target")
if (COMMAND rocm_local_targets)
rocm_local_targets(DEFAULT_GPUS)
else()
message(WARNING "Unable to determine local GPU targets. Falling back to default GPUs.")
endif()
endif()
# Determine which GPU architectures to build for
set(AMDGPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if AMDGPU_TARGETS is not defined.")
# Check if clang compiler can offload to AMDGPU_TARGETS
if (COMMAND rocm_check_target_ids)
message(STATUS "Checking for ROCm support for GPU targets: " "${AMDGPU_TARGETS}")
rocm_check_target_ids(SUPPORTED_GPUS TARGETS ${AMDGPU_TARGETS})
else()
message(WARNING "Unable to check for supported GPU targets. Falling back to default GPUs.")
set(SUPPORTED_GPUS ${DEFAULT_GPUS})
endif()
set(GPU_TARGETS "${SUPPORTED_GPUS}" CACHE STRING "GPU targets to compile for.")
message(STATUS "Compiling for ${GPU_TARGETS}")
foreach(target ${GPU_TARGETS})
list(APPEND static_link_flags --offload-arch=${target})
endforeach()
list(JOIN static_link_flags " " flags_str)
set( CMAKE_CXX_FLAGS "${flags_str} ${CMAKE_CXX_FLAGS}")
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -L${ROCM_PATH}/lib") set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -L${ROCM_PATH}/lib")
include_directories(${ROCM_PATH}/include) include_directories(${ROCM_PATH}/include)
link_libraries(numa hsa-runtime64 pthread) link_libraries(numa hsa-runtime64 pthread)
......
...@@ -62,7 +62,7 @@ struct SubExecParam ...@@ -62,7 +62,7 @@ struct SubExecParam
}; };
// Macro for collecting HW_REG_HW_ID // Macro for collecting HW_REG_HW_ID
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__)
#define GetHwId(hwId) \ #define GetHwId(hwId) \
hwId = 0 hwId = 0
#elif defined(__NVCC__) #elif defined(__NVCC__)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment