"git@developer.sourcefind.cn:gaoqiong/composable_kernel.git" did not exist on "82fae390fb3f8ebaaff5dfeae439b4a1a703d363"
Unverified Commit 7d8ea5f0 authored by Illia Silin's avatar Illia Silin Committed by GitHub
Browse files

Fix build logic using GRU_ARCHS. (#1536)

* update build logic with GPU_ARCHS

* fix the GPU_ARCHS build for codegen

* unset GPU_TARGETS when GPU_ARCHS are set
parent cc8f466a
...@@ -98,11 +98,6 @@ if(DL_KERNELS) ...@@ -98,11 +98,6 @@ if(DL_KERNELS)
set(CK_ENABLE_DL_KERNELS "ON") set(CK_ENABLE_DL_KERNELS "ON")
endif() endif()
if(INSTANCES_ONLY)
add_definitions(-DINSTANCES_ONLY)
set(CK_ENABLE_INSTANCES_ONLY "ON")
endif()
include(getopt) include(getopt)
# CK version file to record release version as well as git commit hash # CK version file to record release version as well as git commit hash
...@@ -127,6 +122,12 @@ rocm_setup_version(VERSION ${version}) ...@@ -127,6 +122,12 @@ rocm_setup_version(VERSION ${version})
list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip "$ENV{ROCM_PATH}" "$ENV{HIP_PATH}") list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip "$ENV{ROCM_PATH}" "$ENV{HIP_PATH}")
message("GPU_TARGETS= ${GPU_TARGETS}") message("GPU_TARGETS= ${GPU_TARGETS}")
message("GPU_ARCHS= ${GPU_ARCHS}")
if(GPU_ARCHS)
#disable GPU_TARGETS to avoid conflicts, this needs to happen before we call hip package
unset(GPU_TARGETS CACHE)
unset(AMDGPU_TARGETS CACHE)
endif()
find_package(hip) find_package(hip)
# No assumption that HIP kernels are launched with uniform block size for backward compatibility # No assumption that HIP kernels are launched with uniform block size for backward compatibility
...@@ -135,55 +136,38 @@ math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR}) ...@@ -135,55 +136,38 @@ math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR})
message("hip_version_flat=${hip_VERSION_FLAT}") message("hip_version_flat=${hip_VERSION_FLAT}")
message("checking which targets are supported") message("checking which targets are supported")
#This is the list of targets to be used in case GPU_TARGETS is not set on command line #In order to build just the CK library (without tests and examples) for all supported GPU targets
#These targets will be filtered and only supported ones will be used #use -D GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
#Setting GPU_TARGETS on command line will override this list #the GPU_TARGETS flag will be reset in this case in order to avoid conflicts.
if(NOT PROFILER_ONLY) #
if(NOT ENABLE_ASAN_PACKAGING) #In order to build CK along with all tests and examples it should be OK to set GPU_TARGETS to just 1 or 2 similar architectures.
#build CK for all supported targets if(NOT ENABLE_ASAN_PACKAGING)
if(NOT WIN32 AND ${hip_VERSION_FLAT} LESS 600300000) if(NOT WIN32 AND ${hip_VERSION_FLAT} LESS 600300000)
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above # WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
rocm_check_target_ids(DEFAULT_GPU_TARGETS set(CK_GPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102")
TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102")
else()
rocm_check_target_ids(DEFAULT_GPU_TARGETS
TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
endif()
else() else()
#build CK only for xnack-supported targets set(CK_GPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
rocm_check_target_ids(DEFAULT_GPU_TARGETS
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+")
set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING " " FORCE)
endif() endif()
else() else()
add_definitions(-DPROFILER_ONLY) #build CK only for xnack-supported targets when using ASAN
set(GPU_TARGETS "" CACHE STRING "" FORCE) set(CK_GPU_TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+")
endif()
#if user set GPU_ARCHS on the cmake command line, overwrite default target list with user's list
#otherwise, if user set GPU_TARGETS, use that set of targets
if(GPU_ARCHS)
set(CK_GPU_TARGETS ${GPU_ARCHS})
else()
if(GPU_TARGETS) if(GPU_TARGETS)
message(FATAL_ERROR "For PROFILE_ONLY build, please do not set GPU_TARGETS, use GPU_ARCH = gfx90, gfx94, gfx10, gfx11 or gfx12") set(CK_GPU_TARGETS ${GPU_TARGETS})
endif()
if(GPU_ARCH MATCHES "gfx90")
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx908;gfx90a")
elseif(GPU_ARCH MATCHES "gfx94")
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx940;gfx941;gfx942")
elseif(GPU_ARCH MATCHES "gfx10")
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1030")
elseif(GPU_ARCH MATCHES "gfx11")
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1100;gfx1101;gfx1102")
elseif(GPU_ARCH MATCHES "gfx12")
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1200;gfx1201")
else()
message(FATAL_ERROR "For PROFILE_ONLY build, please specify GPU_ARCH as gfx90, gfx94, gfx10, gfx11 or gfx12")
endif() endif()
set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING " " FORCE)
endif() endif()
message("Supported GPU_TARGETS= ${DEFAULT_GPU_TARGETS}") #make sure all the targets on the list are actually supported by the current compiler
rocm_check_target_ids(SUPPORTED_GPU_TARGETS
TARGETS ${CK_GPU_TARGETS})
if(GPU_TARGETS) message("Building CK for the following targets: ${SUPPORTED_GPU_TARGETS}")
message("Building CK for the following targets: ${GPU_TARGETS}")
else()
message("Building CK for the default targets: ${DEFAULT_GPU_TARGETS}")
endif()
if (GPU_TARGETS) if (GPU_TARGETS)
if (GPU_TARGETS MATCHES "gfx9") if (GPU_TARGETS MATCHES "gfx9")
...@@ -557,8 +541,7 @@ ENDFOREACH() ...@@ -557,8 +541,7 @@ ENDFOREACH()
add_custom_target(instances DEPENDS utility;${CK_DEVICE_INSTANCES} SOURCES ${INSTANCE_FILES}) add_custom_target(instances DEPENDS utility;${CK_DEVICE_INSTANCES} SOURCES ${INSTANCE_FILES})
add_subdirectory(library) add_subdirectory(library)
if(NOT DEFINED INSTANCES_ONLY) if(NOT GPU_ARCHS)
if(NOT DEFINED PROFILER_ONLY)
rocm_package_setup_component(tests rocm_package_setup_component(tests
LIBRARY_NAME composablekernel LIBRARY_NAME composablekernel
PACKAGE_NAME tests # Prevent -static suffix on package name PACKAGE_NAME tests # Prevent -static suffix on package name
...@@ -572,23 +555,15 @@ if(NOT DEFINED INSTANCES_ONLY) ...@@ -572,23 +555,15 @@ if(NOT DEFINED INSTANCES_ONLY)
if(BUILD_TESTING) if(BUILD_TESTING)
add_subdirectory(test) add_subdirectory(test)
endif() endif()
rocm_package_setup_component(profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler
)
add_subdirectory(profiler)
else()
#When building PROFILER_ONLY, label the package with GPU_ARCH
rocm_package_setup_component(profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler_${GPU_ARCH}
)
add_subdirectory(profiler)
endif()
endif() endif()
if(NOT DEFINED PROFILER_ONLY AND (GPU_TARGETS MATCHES "gfx9" OR DEFINED INSTANCES_ONLY)) rocm_package_setup_component(profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler
)
add_subdirectory(profiler)
if(GPU_TARGETS MATCHES "gfx9" OR GPU_ARCHS)
add_subdirectory(codegen) add_subdirectory(codegen)
endif() endif()
......
...@@ -1138,8 +1138,8 @@ pipeline { ...@@ -1138,8 +1138,8 @@ pipeline {
execute_args = """ cmake -D CMAKE_PREFIX_PATH=/opt/rocm \ execute_args = """ cmake -D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_CXX_COMPILER="${build_compiler()}" \ -D CMAKE_CXX_COMPILER="${build_compiler()}" \
-D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Release \
-D INSTANCES_ONLY=ON \ -D GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """ -D CMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """
} }
steps{ steps{
buildHipClangJobAndReboot(setup_cmd: "", build_cmd: "", no_reboot:true, build_type: 'Release', execute_cmd: execute_args) buildHipClangJobAndReboot(setup_cmd: "", build_cmd: "", no_reboot:true, build_type: 'Release', execute_cmd: execute_args)
......
...@@ -90,7 +90,12 @@ Docker images are available on [DockerHub](https://hub.docker.com/r/rocm/composa ...@@ -90,7 +90,12 @@ Docker images are available on [DockerHub](https://hub.docker.com/r/rocm/composa
``` ```
If you don't set `GPU_TARGETS` on the cmake command line, CK is built for all GPU targets If you don't set `GPU_TARGETS` on the cmake command line, CK is built for all GPU targets
supported by the current compiler (this may take a long time). supported by the current compiler (this may take a long time).
NOTE: If you try setting `GPU_TARGETS` to a list of architectures, the build will only work if the
architectures are similar, e.g., `gfx908;gfx90a`, or `gfx1100;gfx1101;gfx11012`. Otherwise, if you
want to build the library for a list of different architectures,
you should use the `GPU_ARCHS` build argument, for example `GPU_ARCHS=gfx908;gfx1030;gfx1100;gfx942`.
4. Build the entire CK library: 4. Build the entire CK library:
...@@ -137,10 +142,6 @@ crash. In such cases, you can reduce the number of threads to 32 by using `-j32` ...@@ -137,10 +142,6 @@ crash. In such cases, you can reduce the number of threads to 32 by using `-j32`
Additional cmake flags can be used to significantly speed-up the build: Additional cmake flags can be used to significantly speed-up the build:
* `INSTANCES_ONLY` (default is OFF) must be set to ON in order to build only the instances and library
while skipping all tests, examples, and profiler. This is useful in cases when you plan to use CK as a
dependency and don't plan to run any examples or tests.
* `DTYPES` (default is not set) can be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build * `DTYPES` (default is not set) can be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build
instances of select data types only. The main default data types are fp32 and fp16; you can safely skip instances of select data types only. The main default data types are fp32 and fp16; you can safely skip
other data types. other data types.
......
list(APPEND CMAKE_PREFIX_PATH /opt/rocm) list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
add_subdirectory(rtc) add_subdirectory(rtc)
file(GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp) file(GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp)
if(NOT INSTANCES_ONLY) # do not build the tests when we build the library for various targets
if(NOT GPU_ARCHS)
foreach(TEST_SRC ${TEST_SRCS}) foreach(TEST_SRC ${TEST_SRCS})
set_source_files_properties(${TEST_SRC} PROPERTIES LANGUAGE HIP) set_source_files_properties(${TEST_SRC} PROPERTIES LANGUAGE HIP)
get_filename_component(BASE_NAME ${TEST_SRC} NAME_WE) get_filename_component(BASE_NAME ${TEST_SRC} NAME_WE)
......
...@@ -45,11 +45,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME) ...@@ -45,11 +45,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME)
endforeach() endforeach()
endif() endif()
if(INSTANCES_ONLY) set(EX_TARGETS ${SUPPORTED_GPU_TARGETS})
set(EX_TARGETS ${DEFAULT_GPU_TARGETS})
else()
set(EX_TARGETS ${GPU_TARGETS})
endif()
#Do not build any DL examples if DL_KERNELS not set #Do not build any DL examples if DL_KERNELS not set
foreach(source IN LISTS FILE_NAME) foreach(source IN LISTS FILE_NAME)
...@@ -147,11 +143,8 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME) ...@@ -147,11 +143,8 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME)
endforeach() endforeach()
endif() endif()
if(INSTANCES_ONLY) set(EX_TARGETS ${SUPPORTED_GPU_TARGETS})
set(EX_TARGETS ${DEFAULT_GPU_TARGETS})
else()
set(EX_TARGETS ${GPU_TARGETS})
endif()
#Do not build any DL examples if DL_KERNELS not set #Do not build any DL examples if DL_KERNELS not set
foreach(source IN LISTS FILE_NAME) foreach(source IN LISTS FILE_NAME)
if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl")
......
...@@ -97,13 +97,6 @@ ...@@ -97,13 +97,6 @@
#cmakedefine CK_ENABLE_DL_KERNELS @CK_ENABLE_DL_KERNELS@ #cmakedefine CK_ENABLE_DL_KERNELS @CK_ENABLE_DL_KERNELS@
#endif #endif
//
// Instances supports in the current CK build
//
#ifndef CK_ENABLE_INSTANCES_ONLY
#cmakedefine CK_ENABLE_INSTANCES_ONLY @CK_ENABLE_INSTANCES_ONLY@
#endif
// //
// CK kernels which support XDL (MI series) // CK kernels which support XDL (MI series)
// //
......
...@@ -37,11 +37,7 @@ function(add_instance_library INSTANCE_NAME) ...@@ -37,11 +37,7 @@ function(add_instance_library INSTANCE_NAME)
endforeach() endforeach()
endif() endif()
if(INSTANCES_ONLY) set(INST_TARGETS ${SUPPORTED_GPU_TARGETS})
set(INST_TARGETS ${DEFAULT_GPU_TARGETS})
else()
set(INST_TARGETS ${GPU_TARGETS})
endif()
# Do not build DL instances if DL_KERNELS macro is not set # Do not build DL instances if DL_KERNELS macro is not set
foreach(source IN LISTS ARGN) foreach(source IN LISTS ARGN)
...@@ -75,11 +71,7 @@ function(add_instance_library INSTANCE_NAME) ...@@ -75,11 +71,7 @@ function(add_instance_library INSTANCE_NAME)
if(ARGN) if(ARGN)
set(INST_OBJ) set(INST_OBJ)
foreach(source IN LISTS ARGN) foreach(source IN LISTS ARGN)
if(INSTANCES_ONLY) set(INST_TARGETS ${SUPPORTED_GPU_TARGETS})
set(INST_TARGETS ${DEFAULT_GPU_TARGETS})
else()
set(INST_TARGETS ${GPU_TARGETS})
endif()
if(source MATCHES "_xdl") if(source MATCHES "_xdl")
list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201) list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
elseif(ARGN MATCHES "_wmma") elseif(ARGN MATCHES "_wmma")
...@@ -191,12 +183,7 @@ FOREACH(subdir_path ${dir_list}) ...@@ -191,12 +183,7 @@ FOREACH(subdir_path ${dir_list})
set(add_inst 1) set(add_inst 1)
endif() endif()
if(INSTANCES_ONLY) set(INST_TARGETS ${SUPPORTED_GPU_TARGETS})
set(INST_TARGETS ${DEFAULT_GPU_TARGETS})
else()
set(INST_TARGETS ${GPU_TARGETS})
endif()
if(("${cmake_instance}" MATCHES "quantization") AND (DEFINED DTYPES) AND (NOT DTYPES MATCHES "int8")) if(("${cmake_instance}" MATCHES "quantization") AND (DEFINED DTYPES) AND (NOT DTYPES MATCHES "int8"))
message("quantization instances will not be built!") message("quantization instances will not be built!")
......
...@@ -24,7 +24,7 @@ set(PROFILER_SOURCES ...@@ -24,7 +24,7 @@ set(PROFILER_SOURCES
profile_permute_scale.cpp profile_permute_scale.cpp
) )
if(GPU_TARGETS MATCHES "gfx9") if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES) if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES)
list(APPEND PROFILER_SOURCES profile_contraction_bilinear.cpp) list(APPEND PROFILER_SOURCES profile_contraction_bilinear.cpp)
list(APPEND PROFILER_SOURCES profile_contraction_scale.cpp) list(APPEND PROFILER_SOURCES profile_contraction_scale.cpp)
...@@ -49,7 +49,7 @@ if(GPU_TARGETS MATCHES "gfx9") ...@@ -49,7 +49,7 @@ if(GPU_TARGETS MATCHES "gfx9")
list(APPEND PROFILER_SOURCES profile_grouped_gemm_multiply_tile_loop.cpp) list(APPEND PROFILER_SOURCES profile_grouped_gemm_multiply_tile_loop.cpp)
endif() endif()
list(APPEND PROFILER_SOURCES profile_gemm_multiply_add.cpp) list(APPEND PROFILER_SOURCES profile_gemm_multiply_add.cpp)
if(GPU_TARGETS MATCHES "gfx94") if(SUPPORTED_GPU_TARGETS MATCHES "gfx94")
list(APPEND PROFILER_SOURCES profile_gemm_multiply_multiply.cpp) list(APPEND PROFILER_SOURCES profile_gemm_multiply_multiply.cpp)
list(APPEND PROFILER_SOURCES profile_gemm_ab_scale.cpp) list(APPEND PROFILER_SOURCES profile_gemm_ab_scale.cpp)
endif() endif()
...@@ -69,7 +69,7 @@ if(GPU_TARGETS MATCHES "gfx9") ...@@ -69,7 +69,7 @@ if(GPU_TARGETS MATCHES "gfx9")
endif() endif()
if(GPU_TARGETS MATCHES "gfx11" OR GPU_TARGETS MATCHES "gfx12" OR GPU_TARGETS MATCHES "gfx9") if(SUPPORTED_GPU_TARGETS MATCHES "gfx11" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12" OR SUPPORTED_GPU_TARGETS MATCHES "gfx9")
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES) if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
list(APPEND PROFILER_SOURCES profile_gemm_bilinear.cpp) list(APPEND PROFILER_SOURCES profile_gemm_bilinear.cpp)
endif() endif()
...@@ -111,7 +111,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_column_to_image_inst ...@@ -111,7 +111,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_column_to_image_inst
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_transpose_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_transpose_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_permute_scale_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_permute_scale_instance)
if(GPU_TARGETS MATCHES "gfx9") if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES) if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_bilinear_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_bilinear_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_scale_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_scale_instance)
...@@ -135,7 +135,7 @@ if(GPU_TARGETS MATCHES "gfx9") ...@@ -135,7 +135,7 @@ if(GPU_TARGETS MATCHES "gfx9")
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_reduce_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_reduce_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_add_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_add_instance)
if(GPU_TARGETS MATCHES "gfx94") if(SUPPORTED_GPU_TARGETS MATCHES "gfx94")
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_multiply_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_multiply_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_ab_scale_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_ab_scale_instance)
endif() endif()
...@@ -159,7 +159,7 @@ if(GPU_TARGETS MATCHES "gfx9") ...@@ -159,7 +159,7 @@ if(GPU_TARGETS MATCHES "gfx9")
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_fwd_convinvscale_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_fwd_convinvscale_instance)
endif() endif()
if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11" OR GPU_TARGETS MATCHES "gfx12") if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx11" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12")
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES) if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bilinear_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bilinear_instance)
endif() endif()
......
...@@ -41,11 +41,7 @@ function(add_test_executable TEST_NAME) ...@@ -41,11 +41,7 @@ function(add_test_executable TEST_NAME)
endforeach() endforeach()
endif() endif()
if(INSTANCES_ONLY) set(TEST_TARGETS ${SUPPORTED_GPU_TARGETS})
set(TEST_TARGETS ${DEFAULT_GPU_TARGETS})
else()
set(TEST_TARGETS ${GPU_TARGETS})
endif()
foreach(source IN LISTS ARGN) foreach(source IN LISTS ARGN)
if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl")
...@@ -122,11 +118,7 @@ function(add_gtest_executable TEST_NAME) ...@@ -122,11 +118,7 @@ function(add_gtest_executable TEST_NAME)
endforeach() endforeach()
endif() endif()
if(INSTANCES_ONLY) set(TEST_TARGETS ${SUPPORTED_GPU_TARGETS})
set(TEST_TARGETS ${DEFAULT_GPU_TARGETS})
else()
set(TEST_TARGETS ${GPU_TARGETS})
endif()
foreach(source IN LISTS ARGN) foreach(source IN LISTS ARGN)
if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl")
...@@ -211,10 +203,10 @@ add_subdirectory(conv_tensor_rearrange) ...@@ -211,10 +203,10 @@ add_subdirectory(conv_tensor_rearrange)
add_subdirectory(transpose) add_subdirectory(transpose)
add_subdirectory(permute_scale) add_subdirectory(permute_scale)
add_subdirectory(wrapper) add_subdirectory(wrapper)
if(GPU_TARGETS MATCHES "gfx11") if(SUPPORTED_GPU_TARGETS MATCHES "gfx11")
add_subdirectory(wmma_op) add_subdirectory(wmma_op)
endif() endif()
if(GPU_TARGETS MATCHES "gfx942" AND CK_HIP_VERSION_MAJOR GREATER_EQUAL 6 AND CK_HIP_VERSION_MINOR GREATER_EQUAL 2) # smfmac needs ROCm6.2 if(SUPPORTED_GPU_TARGETS MATCHES "gfx942" AND CK_HIP_VERSION_MAJOR GREATER_EQUAL 6 AND CK_HIP_VERSION_MINOR GREATER_EQUAL 2) # smfmac needs ROCm6.2
add_subdirectory(smfmac_op) add_subdirectory(smfmac_op)
endif() endif()
add_subdirectory(position_embedding) add_subdirectory(position_embedding)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment