Commit 5ab94474 authored by Chao Liu's avatar Chao Liu
Browse files

update cmake

parent bca7b750
......@@ -71,11 +71,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
endif()
message(STATUS "Build with HIP ${HIP_VERSION}")
### half
##find_path(HALF_INCLUDE_DIR half.hpp)
#set(HALF_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/external/half/include")
#message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
rocm_create_package(
NAME CK-${CK_BACKEND}
......@@ -229,21 +224,18 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
#file(GLOB_RECURSE COMPOSABLE_KERNEL_HEADERS "composable_kernel/include/*/*.hpp")
#file(GLOB_RECURSE DEVICE_OPS_HEADERS "device_operation/include/*.hpp")
#
#file(GLOB_RECURSE DEVICE_OPS_SOURCE "device_operation/*.cpp")
#
#set(CK_HEADERS ${COMPOSABLE_KERNEL_HEADERS} ${DEVICE_OPS_HEADERS})
#set(CK_SOURCE ${DEVICE_OPS_SOURCE})
#add_library(composable_kernel ${CK_SOURCE})
configure_file("${PROJECT_SOURCE_DIR}/include/ck/hip_version.hpp.in" "${PROJECT_BINARY_DIR}/include/ck/hip_version.hpp")
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/include
${PROJECT_BINARY_DIR}/include
${PROJECT_SOURCE_DIR}/library/include
)
SET(BUILD_DEV ON CACHE BOOL "BUILD_DEV")
if(BUILD_DEV)
target_compile_options(composable_kernel PRIVATE -Werror)
target_compile_options(composable_kernel PRIVATE -Weverything)
add_compile_options(-Werror)
add_compile_options(-Weverything)
endif()
message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
......
#file(GLOB_RECURSE COMPOSABLE_KERNEL_HEADERS "composable_kernel/include/*/*.hpp")
#file(GLOB_RECURSE DEVICE_OPS_HEADERS "device_operation/include/*.hpp")
#
#file(GLOB_RECURSE DEVICE_OPS_SOURCE "device_operation/*.cpp")
#
#set(CK_HEADERS ${COMPOSABLE_KERNEL_HEADERS} ${DEVICE_OPS_HEADERS})
#set(CK_SOURCE ${DEVICE_OPS_SOURCE})
#add_library(composable_kernel ${CK_SOURCE})
add_subdirectory(src/host_tensor)
add_subdirectory(src/tensor_operation_instance/gpu)
## composable_kernel
file(GLOB_RECURSE CK_LIBRARY_SOURCE "src/*/*.cpp")
set(CK_SOURCE ${CK_LIBRARY_SOURCE})
add_library(composable_kernel ${CK_SOURCE})
target_include_directories(composable_kernel PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
)
target_include_directories(composable_kernel PUBLIC
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
)
target_include_directories(composable_kernel PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/library/include>
)
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/composable_kernel/include
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
include
)
set(HOST_TENSOR_SOURCE
src/host_tensor.cpp;
src/device.cpp;
)
clang_tidy_check(composable_kernel)
......@@ -5,10 +5,12 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor
)
file(GLOB_RECURSE HOST_TENSOR_SOURCE "./*.cpp")
set(HOST_TENSOR_SOURCE
device.cpp
host_tensor.cpp
)
add_library(host_tensor SHARED ${HOST_TENSOR_SOURCE})
target_link_libraries(host_tensor PRIVATE hip::device)
target_link_libraries(host_tensor INTERFACE hip::host)
target_compile_features(host_tensor PUBLIC)
set_target_properties(host_tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS host_tensor LIBRARY DESTINATION lib)
......
......@@ -16,180 +16,15 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/external/include/half
)
# device_gemm_instance
set(DEVICE_GEMM_INSTANCE_SOURCE
gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp;
gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp;
gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp;
gemm/device_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm/device_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp;
gemm/device_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_int8_int8_int8_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp;
)
# device_gemm_bias2d_instance
set(DEVICE_GEMM_BIAS2D_INSTANCE_SOURCE
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_km_kn_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_km_nk_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_mk_kn_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_mk_nk_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_km_kn_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_km_nk_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_mk_nk_mn_instance.cpp;
)
# device_gemm_bias_relu_instance
set(DEVICE_GEMM_BIAS_RELU_INSTANCE_SOURCE
gemm_bias_relu/device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm_bias_relu/device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm_bias_relu/device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_km_kn_mn_instance.cpp;
gemm_bias_relu/device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_km_nk_mn_instance.cpp;
)
# device_gemm_bias_relu_add_instance
set(DEVICE_GEMM_BIAS_RELU_ADD_INSTANCE_SOURCE
gemm_bias_relu_add/device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm_bias_relu_add/device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm_bias_relu_add/device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_km_kn_mn_instance.cpp;
gemm_bias_relu_add/device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_km_nk_mn_instance.cpp;
)
set(DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp;
batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp;
batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp;
batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp;
)
# device_conv2d_fwd_instance
set(DEVICE_CONV2D_FWD_INSTANCE_SOURCE
conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp;
)
# device_conv1d_fwd_instance
set(DEVICE_CONV1D_FWD_INSTANCE_SOURCE
conv1d_fwd/device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instance.cpp;
)
# device_conv2d_fwd_bias_relu_instance
set(DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp;
)
# device_conv2d_fwd_bias_relu_add_instance
set(DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
# device_conv2d_fwd_bias_relu_atomic_add_instance
set(DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
conv2d_fwd_bias_relu_atomic_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_atomic_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
# device_conv2d_bwd_data_instance
set(DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
)
# device_reduce_instance
set(DEVICE_REDUCE_INSTANCE_SOURCE
reduce/device_reduce_instance_blockwise_f16_f16_f16.cpp;
reduce/device_reduce_instance_blockwise_f16_f32_f16.cpp;
reduce/device_reduce_instance_blockwise_f32_f32_f32.cpp;
reduce/device_reduce_instance_blockwise_f32_f64_f32.cpp;
reduce/device_reduce_instance_blockwise_f64_f64_f64.cpp;
reduce/device_reduce_instance_threadwise_f16_f16_f16.cpp;
reduce/device_reduce_instance_threadwise_f16_f32_f16.cpp;
reduce/device_reduce_instance_threadwise_f32_f32_f32.cpp;
reduce/device_reduce_instance_threadwise_f32_f64_f32.cpp;
reduce/device_reduce_instance_threadwise_f64_f64_f64.cpp;
reduce/device_reduce_instance_blockwise_second_call_f16_f16_f16.cpp;
reduce/device_reduce_instance_blockwise_second_call_f32_f32_f16.cpp;
reduce/device_reduce_instance_blockwise_second_call_f32_f32_f32.cpp;
reduce/device_reduce_instance_blockwise_second_call_f64_f64_f32.cpp;
reduce/device_reduce_instance_blockwise_second_call_f64_f64_f64.cpp;
reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32.cpp;
reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32.cpp;
reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f16_f16_f16.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f16_f32_f16.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f32_f32_f32.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f32_f64_f32.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f64_f64_f64.cpp;
)
add_library(device_gemm_instance SHARED ${DEVICE_GEMM_INSTANCE_SOURCE})
add_library(device_gemm_bias2d_instance SHARED ${DEVICE_GEMM_BIAS2D_INSTANCE_SOURCE})
add_library(device_gemm_bias_relu_instance SHARED ${DEVICE_GEMM_BIAS_RELU_INSTANCE_SOURCE})
add_library(device_gemm_bias_relu_add_instance SHARED ${DEVICE_GEMM_BIAS_RELU_ADD_INSTANCE_SOURCE})
add_library(device_batched_gemm_instance SHARED ${DEVICE_BATCHED_GEMM_INSTANCE_SOURCE})
add_library(device_conv1d_fwd_instance SHARED ${DEVICE_CONV1D_FWD_INSTANCE_SOURCE})
add_library(device_conv2d_fwd_instance SHARED ${DEVICE_CONV2D_FWD_INSTANCE_SOURCE})
add_library(device_conv2d_fwd_bias_relu_instance SHARED ${DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE})
add_library(device_conv2d_fwd_bias_relu_add_instance SHARED ${DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE})
add_library(device_conv2d_fwd_bias_relu_atomic_add_instance SHARED ${DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE})
add_library(device_conv2d_bwd_data_instance SHARED ${DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE})
add_library(device_reduce_instance SHARED ${DEVICE_REDUCE_INSTANCE_SOURCE})
target_compile_features(device_gemm_instance PUBLIC)
target_compile_features(device_gemm_bias2d_instance PUBLIC)
target_compile_features(device_gemm_bias_relu_instance PUBLIC)
target_compile_features(device_gemm_bias_relu_add_instance PUBLIC)
target_compile_features(device_batched_gemm_instance PUBLIC)
target_compile_features(device_conv1d_fwd_instance PUBLIC)
target_compile_features(device_conv2d_fwd_instance PUBLIC)
target_compile_features(device_conv2d_fwd_bias_relu_instance PUBLIC)
target_compile_features(device_conv2d_fwd_bias_relu_add_instance PUBLIC)
target_compile_features(device_conv2d_fwd_bias_relu_atomic_add_instance PUBLIC)
target_compile_features(device_conv2d_bwd_data_instance PUBLIC)
target_compile_features(device_reduce_instance PUBLIC)
set_target_properties(device_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_gemm_bias2d_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_gemm_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_gemm_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_batched_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_conv1d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_conv2d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_conv2d_fwd_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_conv2d_fwd_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_conv2d_fwd_bias_relu_atomic_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_conv2d_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(device_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_gemm_instance LIBRARY DESTINATION lib)
install(TARGETS device_gemm_bias2d_instance LIBRARY DESTINATION lib)
install(TARGETS device_gemm_bias_relu_instance LIBRARY DESTINATION lib)
install(TARGETS device_gemm_bias_relu_add_instance LIBRARY DESTINATION lib)
install(TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib)
install(TARGETS device_conv1d_fwd_instance LIBRARY DESTINATION lib)
install(TARGETS device_conv2d_fwd_instance LIBRARY DESTINATION lib)
install(TARGETS device_conv2d_fwd_bias_relu_instance LIBRARY DESTINATION lib)
install(TARGETS device_conv2d_fwd_bias_relu_add_instance LIBRARY DESTINATION lib)
install(TARGETS device_conv2d_fwd_bias_relu_atomic_add_instance LIBRARY DESTINATION lib)
install(TARGETS device_conv2d_bwd_data_instance LIBRARY DESTINATION lib)
install(TARGETS device_reduce_instance LIBRARY DESTINATION lib)
add_subdirectory(gemm)
add_subdirectory(gemm_bias2d)
add_subdirectory(gemm_bias_relu)
add_subdirectory(gemm_bias_relu_add)
add_subdirectory(batched_gemm)
add_subdirectory(conv1d_fwd)
add_subdirectory(conv2d_fwd)
add_subdirectory(conv2d_fwd_bias_relu)
add_subdirectory(conv2d_fwd_bias_relu_add)
add_subdirectory(conv2d_fwd_bias_relu_atomic_add)
add_subdirectory(conv2d_bwd_data)
add_subdirectory(reduce)
#device_batched_gemm_instance
set(DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp;
device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp;
device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp;
device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp;
)
add_library(device_batched_gemm_instance SHARED ${DEVICE_BATCHED_GEMM_INSTANCE_SOURCE})
target_compile_features(device_batched_gemm_instance PUBLIC)
set_target_properties(device_batched_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_batched_gemm_instance)
# device_conv1d_fwd_instance
set(DEVICE_CONV1D_FWD_INSTANCE_SOURCE
device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instance.cpp;
)
add_library(device_conv1d_fwd_instance SHARED ${DEVICE_CONV1D_FWD_INSTANCE_SOURCE})
target_compile_features(device_conv1d_fwd_instance PUBLIC)
set_target_properties(device_conv1d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_conv1d_fwd_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_conv1d_fwd_instance)
# device_conv2d_bwd_data_instance
set(DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
)
add_library(device_conv2d_bwd_data_instance SHARED ${DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE})
target_compile_features(device_conv2d_bwd_data_instance PUBLIC)
set_target_properties(device_conv2d_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_conv2d_bwd_data_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_conv2d_bwd_data_instance)
# device_conv2d_fwd_instance
set(DEVICE_CONV2D_FWD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library(device_conv2d_fwd_instance SHARED ${DEVICE_CONV2D_FWD_INSTANCE_SOURCE})
target_compile_features(device_conv2d_fwd_instance PUBLIC)
set_target_properties(device_conv2d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_conv2d_fwd_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_conv2d_fwd_instance)
# device_conv2d_fwd_bias_relu_instance
set(DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library(device_conv2d_fwd_bias_relu_instance SHARED ${DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE})
target_compile_features(device_conv2d_fwd_bias_relu_instance PUBLIC)
set_target_properties(device_conv2d_fwd_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_conv2d_fwd_bias_relu_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_conv2d_fwd_bias_relu_instance)
# device_conv2d_fwd_bias_relu_add_instance
set(DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library(device_conv2d_fwd_bias_relu_add_instance SHARED ${DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE})
target_compile_features(device_conv2d_fwd_bias_relu_add_instance PUBLIC)
set_target_properties(device_conv2d_fwd_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_conv2d_fwd_bias_relu_add_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_conv2d_fwd_bias_relu_add_instance)
# device_conv2d_fwd_bias_relu_atomic_add_instance
set(DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_atomic_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library(device_conv2d_fwd_bias_relu_atomic_add_instance SHARED ${DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE})
target_compile_features(device_conv2d_fwd_bias_relu_atomic_add_instance PUBLIC)
set_target_properties(device_conv2d_fwd_bias_relu_atomic_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_conv2d_fwd_bias_relu_atomic_add_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_conv2d_fwd_bias_relu_atomic_add_instance)
# device_gemm_instance
set(DEVICE_GEMM_INSTANCE_SOURCE
device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp;
device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp;
device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp;
device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp;
device_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_int8_int8_int8_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp;
)
add_library(device_gemm_instance SHARED ${DEVICE_GEMM_INSTANCE_SOURCE})
target_compile_features(device_gemm_instance PUBLIC)
set_target_properties(device_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_gemm_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_gemm_instance)
# device_gemm_bias2d_instance
set(DEVICE_GEMM_BIAS2D_INSTANCE_SOURCE
device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_km_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_km_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_mk_nk_mn_instance.cpp;
)
add_library(device_gemm_bias2d_instance SHARED ${DEVICE_GEMM_BIAS2D_INSTANCE_SOURCE})
target_compile_features(device_gemm_bias2d_instance PUBLIC)
set_target_properties(device_gemm_bias2d_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_gemm_bias2d_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_gemm_bias2d_instance)
# device_gemm_bias_relu_instance
set(DEVICE_GEMM_BIAS_RELU_INSTANCE_SOURCE
device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_km_nk_mn_instance.cpp;
)
add_library(device_gemm_bias_relu_instance SHARED ${DEVICE_GEMM_BIAS_RELU_INSTANCE_SOURCE})
target_compile_features(device_gemm_bias_relu_instance PUBLIC)
set_target_properties(device_gemm_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_gemm_bias_relu_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_gemm_bias_relu_instance)
# device_gemm_bias_relu_add_instance
set(DEVICE_GEMM_BIAS_RELU_ADD_INSTANCE_SOURCE
device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_km_nk_mn_instance.cpp;
)
add_library(device_gemm_bias_relu_add_instance SHARED ${DEVICE_GEMM_BIAS_RELU_ADD_INSTANCE_SOURCE})
target_compile_features(device_gemm_bias_relu_add_instance PUBLIC)
set_target_properties(device_gemm_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_gemm_bias_relu_add_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_gemm_bias_relu_add_instance)
# device_reduce_instance
set(DEVICE_REDUCE_INSTANCE_SOURCE
device_reduce_instance_blockwise_f16_f16_f16.cpp;
device_reduce_instance_blockwise_f16_f32_f16.cpp;
device_reduce_instance_blockwise_f32_f32_f32.cpp;
device_reduce_instance_blockwise_f32_f64_f32.cpp;
device_reduce_instance_blockwise_f64_f64_f64.cpp;
device_reduce_instance_threadwise_f16_f16_f16.cpp;
device_reduce_instance_threadwise_f16_f32_f16.cpp;
device_reduce_instance_threadwise_f32_f32_f32.cpp;
device_reduce_instance_threadwise_f32_f64_f32.cpp;
device_reduce_instance_threadwise_f64_f64_f64.cpp;
device_reduce_instance_blockwise_second_call_f16_f16_f16.cpp;
device_reduce_instance_blockwise_second_call_f32_f32_f16.cpp;
device_reduce_instance_blockwise_second_call_f32_f32_f32.cpp;
device_reduce_instance_blockwise_second_call_f64_f64_f32.cpp;
device_reduce_instance_blockwise_second_call_f64_f64_f64.cpp;
device_reduce_instance_multiblock_atomic_add_f16_f32_f32.cpp;
device_reduce_instance_multiblock_atomic_add_f32_f32_f32.cpp;
device_reduce_instance_multiblock_atomic_add_f32_f64_f32.cpp;
device_reduce_instance_multiblock_partial_reduce_f16_f16_f16.cpp;
device_reduce_instance_multiblock_partial_reduce_f16_f32_f16.cpp;
device_reduce_instance_multiblock_partial_reduce_f32_f32_f32.cpp;
device_reduce_instance_multiblock_partial_reduce_f32_f64_f32.cpp;
device_reduce_instance_multiblock_partial_reduce_f64_f64_f64.cpp;
)
add_library(device_reduce_instance SHARED ${DEVICE_REDUCE_INSTANCE_SOURCE})
target_compile_features(device_reduce_instance PUBLIC)
set_target_properties(device_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS device_reduce_instance LIBRARY DESTINATION lib)
clang_tidy_check(device_reduce_instance)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment