Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5ab94474
Commit
5ab94474
authored
Mar 08, 2022
by
Chao Liu
Browse files
update cmake
parent
bca7b750
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
222 additions
and
235 deletions
+222
-235
CMakeLists.txt
CMakeLists.txt
+8
-16
library/CMakeLists.txt
library/CMakeLists.txt
+0
-39
library/src/host_tensor/CMakeLists.txt
library/src/host_tensor/CMakeLists.txt
+5
-3
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+12
-177
library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
...tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
+14
-0
library/src/tensor_operation_instance/gpu/conv1d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/conv1d_fwd/CMakeLists.txt
+11
-0
library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
...sor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
+14
-0
library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
+14
-0
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
...peration_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
+10
-0
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
...tion_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
+10
-0
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_atomic_add/CMakeLists.txt
...stance/gpu/conv2d_fwd_bias_relu_atomic_add/CMakeLists.txt
+11
-0
library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt
...ary/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt
+34
-0
library/src/tensor_operation_instance/gpu/gemm_bias2d/CMakeLists.txt
.../tensor_operation_instance/gpu/gemm_bias2d/CMakeLists.txt
+18
-0
library/src/tensor_operation_instance/gpu/gemm_bias_relu/CMakeLists.txt
...nsor_operation_instance/gpu/gemm_bias_relu/CMakeLists.txt
+14
-0
library/src/tensor_operation_instance/gpu/gemm_bias_relu_add/CMakeLists.txt
..._operation_instance/gpu/gemm_bias_relu_add/CMakeLists.txt
+14
-0
library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt
...y/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt
+33
-0
No files found.
CMakeLists.txt
View file @
5ab94474
...
@@ -71,11 +71,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
...
@@ -71,11 +71,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
endif
()
endif
()
message
(
STATUS
"Build with HIP
${
HIP_VERSION
}
"
)
message
(
STATUS
"Build with HIP
${
HIP_VERSION
}
"
)
### half
##find_path(HALF_INCLUDE_DIR half.hpp)
#set(HALF_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/external/half/include")
#message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
rocm_create_package
(
rocm_create_package
(
NAME CK-
${
CK_BACKEND
}
NAME CK-
${
CK_BACKEND
}
...
@@ -229,21 +224,18 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
...
@@ -229,21 +224,18 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/bin
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/bin
)
#file(GLOB_RECURSE COMPOSABLE_KERNEL_HEADERS "composable_kernel/include/*/*.hpp")
#file(GLOB_RECURSE DEVICE_OPS_HEADERS "device_operation/include/*.hpp")
#
#file(GLOB_RECURSE DEVICE_OPS_SOURCE "device_operation/*.cpp")
#
#set(CK_HEADERS ${COMPOSABLE_KERNEL_HEADERS} ${DEVICE_OPS_HEADERS})
#set(CK_SOURCE ${DEVICE_OPS_SOURCE})
#add_library(composable_kernel ${CK_SOURCE})
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/include/ck/hip_version.hpp.in"
"
${
PROJECT_BINARY_DIR
}
/include/ck/hip_version.hpp"
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/include/ck/hip_version.hpp.in"
"
${
PROJECT_BINARY_DIR
}
/include/ck/hip_version.hpp"
)
include_directories
(
BEFORE
${
PROJECT_SOURCE_DIR
}
/include
${
PROJECT_BINARY_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/library/include
)
SET
(
BUILD_DEV ON CACHE BOOL
"BUILD_DEV"
)
SET
(
BUILD_DEV ON CACHE BOOL
"BUILD_DEV"
)
if
(
BUILD_DEV
)
if
(
BUILD_DEV
)
target
_compile_options
(
composable_kernel PRIVATE
-Werror
)
add
_compile_options
(
-Werror
)
target
_compile_options
(
composable_kernel PRIVATE
-Weverything
)
add
_compile_options
(
-Weverything
)
endif
()
endif
()
message
(
"CMAKE_CXX_FLAGS:
${
CMAKE_CXX_FLAGS
}
"
)
message
(
"CMAKE_CXX_FLAGS:
${
CMAKE_CXX_FLAGS
}
"
)
...
...
library/CMakeLists.txt
View file @
5ab94474
#file(GLOB_RECURSE COMPOSABLE_KERNEL_HEADERS "composable_kernel/include/*/*.hpp")
#file(GLOB_RECURSE DEVICE_OPS_HEADERS "device_operation/include/*.hpp")
#
#file(GLOB_RECURSE DEVICE_OPS_SOURCE "device_operation/*.cpp")
#
#set(CK_HEADERS ${COMPOSABLE_KERNEL_HEADERS} ${DEVICE_OPS_HEADERS})
#set(CK_SOURCE ${DEVICE_OPS_SOURCE})
#add_library(composable_kernel ${CK_SOURCE})
add_subdirectory
(
src/host_tensor
)
add_subdirectory
(
src/host_tensor
)
add_subdirectory
(
src/tensor_operation_instance/gpu
)
add_subdirectory
(
src/tensor_operation_instance/gpu
)
## composable_kernel
file
(
GLOB_RECURSE CK_LIBRARY_SOURCE
"src/*/*.cpp"
)
set
(
CK_SOURCE
${
CK_LIBRARY_SOURCE
}
)
add_library
(
composable_kernel
${
CK_SOURCE
}
)
target_include_directories
(
composable_kernel PUBLIC
$<BUILD_INTERFACE:
${
PROJECT_SOURCE_DIR
}
/include>
)
target_include_directories
(
composable_kernel PUBLIC
$<BUILD_INTERFACE:
${
PROJECT_BINARY_DIR
}
/include>
)
target_include_directories
(
composable_kernel PUBLIC
$<BUILD_INTERFACE:
${
PROJECT_SOURCE_DIR
}
/library/include>
)
include_directories
(
BEFORE
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/utility
include
)
set
(
HOST_TENSOR_SOURCE
src/host_tensor.cpp;
src/device.cpp;
)
clang_tidy_check
(
composable_kernel
)
library/src/host_tensor/CMakeLists.txt
View file @
5ab94474
...
@@ -5,10 +5,12 @@ include_directories(BEFORE
...
@@ -5,10 +5,12 @@ include_directories(BEFORE
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/host_tensor
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/host_tensor
)
)
file
(
GLOB_RECURSE HOST_TENSOR_SOURCE
"./*.cpp"
)
set
(
HOST_TENSOR_SOURCE
device.cpp
host_tensor.cpp
)
add_library
(
host_tensor SHARED
${
HOST_TENSOR_SOURCE
}
)
add_library
(
host_tensor SHARED
${
HOST_TENSOR_SOURCE
}
)
target_link_libraries
(
host_tensor PRIVATE hip::device
)
target_link_libraries
(
host_tensor INTERFACE hip::host
)
target_compile_features
(
host_tensor PUBLIC
)
target_compile_features
(
host_tensor PUBLIC
)
set_target_properties
(
host_tensor PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
host_tensor PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS host_tensor LIBRARY DESTINATION lib
)
install
(
TARGETS host_tensor LIBRARY DESTINATION lib
)
...
...
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
5ab94474
...
@@ -16,180 +16,15 @@ include_directories(BEFORE
...
@@ -16,180 +16,15 @@ include_directories(BEFORE
${
PROJECT_SOURCE_DIR
}
/external/include/half
${
PROJECT_SOURCE_DIR
}
/external/include/half
)
)
# device_gemm_instance
add_subdirectory
(
gemm
)
set
(
DEVICE_GEMM_INSTANCE_SOURCE
add_subdirectory
(
gemm_bias2d
)
gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp;
add_subdirectory
(
gemm_bias_relu
)
gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp;
add_subdirectory
(
gemm_bias_relu_add
)
gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp;
add_subdirectory
(
batched_gemm
)
gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp;
add_subdirectory
(
conv1d_fwd
)
gemm/device_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp;
add_subdirectory
(
conv2d_fwd
)
gemm/device_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp;
add_subdirectory
(
conv2d_fwd_bias_relu
)
gemm/device_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp;
add_subdirectory
(
conv2d_fwd_bias_relu_add
)
gemm/device_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp;
add_subdirectory
(
conv2d_fwd_bias_relu_atomic_add
)
gemm/device_gemm_xdl_c_shuffle_int8_int8_int8_mk_nk_mn_instance.cpp;
add_subdirectory
(
conv2d_bwd_data
)
gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp;
add_subdirectory
(
reduce
)
gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp;
gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp;
gemm/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp;
)
# device_gemm_bias2d_instance
set
(
DEVICE_GEMM_BIAS2D_INSTANCE_SOURCE
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_km_kn_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_km_nk_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_mk_kn_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_mk_nk_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_km_kn_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_km_nk_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm_bias2d/device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_mk_nk_mn_instance.cpp;
)
# device_gemm_bias_relu_instance
set
(
DEVICE_GEMM_BIAS_RELU_INSTANCE_SOURCE
gemm_bias_relu/device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm_bias_relu/device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm_bias_relu/device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_km_kn_mn_instance.cpp;
gemm_bias_relu/device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_km_nk_mn_instance.cpp;
)
# device_gemm_bias_relu_add_instance
set
(
DEVICE_GEMM_BIAS_RELU_ADD_INSTANCE_SOURCE
gemm_bias_relu_add/device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_mk_kn_mn_instance.cpp;
gemm_bias_relu_add/device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_mk_nk_mn_instance.cpp;
gemm_bias_relu_add/device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_km_kn_mn_instance.cpp;
gemm_bias_relu_add/device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_km_nk_mn_instance.cpp;
)
set
(
DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp;
batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp;
batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp;
batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp;
)
# device_conv2d_fwd_instance
set
(
DEVICE_CONV2D_FWD_INSTANCE_SOURCE
conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp;
)
# device_conv1d_fwd_instance
set
(
DEVICE_CONV1D_FWD_INSTANCE_SOURCE
conv1d_fwd/device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instance.cpp;
)
# device_conv2d_fwd_bias_relu_instance
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp;
)
# device_conv2d_fwd_bias_relu_add_instance
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
# device_conv2d_fwd_bias_relu_atomic_add_instance
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
conv2d_fwd_bias_relu_atomic_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_atomic_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
# device_conv2d_bwd_data_instance
set
(
DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
)
# device_reduce_instance
set
(
DEVICE_REDUCE_INSTANCE_SOURCE
reduce/device_reduce_instance_blockwise_f16_f16_f16.cpp;
reduce/device_reduce_instance_blockwise_f16_f32_f16.cpp;
reduce/device_reduce_instance_blockwise_f32_f32_f32.cpp;
reduce/device_reduce_instance_blockwise_f32_f64_f32.cpp;
reduce/device_reduce_instance_blockwise_f64_f64_f64.cpp;
reduce/device_reduce_instance_threadwise_f16_f16_f16.cpp;
reduce/device_reduce_instance_threadwise_f16_f32_f16.cpp;
reduce/device_reduce_instance_threadwise_f32_f32_f32.cpp;
reduce/device_reduce_instance_threadwise_f32_f64_f32.cpp;
reduce/device_reduce_instance_threadwise_f64_f64_f64.cpp;
reduce/device_reduce_instance_blockwise_second_call_f16_f16_f16.cpp;
reduce/device_reduce_instance_blockwise_second_call_f32_f32_f16.cpp;
reduce/device_reduce_instance_blockwise_second_call_f32_f32_f32.cpp;
reduce/device_reduce_instance_blockwise_second_call_f64_f64_f32.cpp;
reduce/device_reduce_instance_blockwise_second_call_f64_f64_f64.cpp;
reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32.cpp;
reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32.cpp;
reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f16_f16_f16.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f16_f32_f16.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f32_f32_f32.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f32_f64_f32.cpp;
reduce/device_reduce_instance_multiblock_partial_reduce_f64_f64_f64.cpp;
)
add_library
(
device_gemm_instance SHARED
${
DEVICE_GEMM_INSTANCE_SOURCE
}
)
add_library
(
device_gemm_bias2d_instance SHARED
${
DEVICE_GEMM_BIAS2D_INSTANCE_SOURCE
}
)
add_library
(
device_gemm_bias_relu_instance SHARED
${
DEVICE_GEMM_BIAS_RELU_INSTANCE_SOURCE
}
)
add_library
(
device_gemm_bias_relu_add_instance SHARED
${
DEVICE_GEMM_BIAS_RELU_ADD_INSTANCE_SOURCE
}
)
add_library
(
device_batched_gemm_instance SHARED
${
DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
}
)
add_library
(
device_conv1d_fwd_instance SHARED
${
DEVICE_CONV1D_FWD_INSTANCE_SOURCE
}
)
add_library
(
device_conv2d_fwd_instance SHARED
${
DEVICE_CONV2D_FWD_INSTANCE_SOURCE
}
)
add_library
(
device_conv2d_fwd_bias_relu_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
}
)
add_library
(
device_conv2d_fwd_bias_relu_add_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
}
)
add_library
(
device_conv2d_fwd_bias_relu_atomic_add_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
}
)
add_library
(
device_conv2d_bwd_data_instance SHARED
${
DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
}
)
add_library
(
device_reduce_instance SHARED
${
DEVICE_REDUCE_INSTANCE_SOURCE
}
)
target_compile_features
(
device_gemm_instance PUBLIC
)
target_compile_features
(
device_gemm_bias2d_instance PUBLIC
)
target_compile_features
(
device_gemm_bias_relu_instance PUBLIC
)
target_compile_features
(
device_gemm_bias_relu_add_instance PUBLIC
)
target_compile_features
(
device_batched_gemm_instance PUBLIC
)
target_compile_features
(
device_conv1d_fwd_instance PUBLIC
)
target_compile_features
(
device_conv2d_fwd_instance PUBLIC
)
target_compile_features
(
device_conv2d_fwd_bias_relu_instance PUBLIC
)
target_compile_features
(
device_conv2d_fwd_bias_relu_add_instance PUBLIC
)
target_compile_features
(
device_conv2d_fwd_bias_relu_atomic_add_instance PUBLIC
)
target_compile_features
(
device_conv2d_bwd_data_instance PUBLIC
)
target_compile_features
(
device_reduce_instance PUBLIC
)
set_target_properties
(
device_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_gemm_bias2d_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_gemm_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_gemm_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_batched_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_conv1d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_conv2d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_conv2d_fwd_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_conv2d_fwd_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_conv2d_fwd_bias_relu_atomic_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_conv2d_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_gemm_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_gemm_bias2d_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_gemm_bias_relu_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_gemm_bias_relu_add_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_conv1d_fwd_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_conv2d_fwd_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_conv2d_fwd_bias_relu_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_conv2d_fwd_bias_relu_add_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_conv2d_fwd_bias_relu_atomic_add_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_conv2d_bwd_data_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_reduce_instance LIBRARY DESTINATION lib
)
library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
0 → 100644
View file @
5ab94474
#device_batched_gemm_instance
set
(
DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp;
device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp;
device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp;
device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp;
)
add_library
(
device_batched_gemm_instance SHARED
${
DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
}
)
target_compile_features
(
device_batched_gemm_instance PUBLIC
)
set_target_properties
(
device_batched_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_batched_gemm_instance
)
library/src/tensor_operation_instance/gpu/conv1d_fwd/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_conv1d_fwd_instance
set
(
DEVICE_CONV1D_FWD_INSTANCE_SOURCE
device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instance.cpp;
)
add_library
(
device_conv1d_fwd_instance SHARED
${
DEVICE_CONV1D_FWD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv1d_fwd_instance PUBLIC
)
set_target_properties
(
device_conv1d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv1d_fwd_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv1d_fwd_instance
)
library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_conv2d_bwd_data_instance
set
(
DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
)
add_library
(
device_conv2d_bwd_data_instance SHARED
${
DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_bwd_data_instance PUBLIC
)
set_target_properties
(
device_conv2d_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_bwd_data_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_bwd_data_instance
)
library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_conv2d_fwd_instance
set
(
DEVICE_CONV2D_FWD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp;
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_instance SHARED
${
DEVICE_CONV2D_FWD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_fwd_instance PUBLIC
)
set_target_properties
(
device_conv2d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_fwd_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_fwd_instance
)
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_conv2d_fwd_bias_relu_instance
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_bias_relu_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_fwd_bias_relu_instance PUBLIC
)
set_target_properties
(
device_conv2d_fwd_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_fwd_bias_relu_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_fwd_bias_relu_instance
)
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_conv2d_fwd_bias_relu_add_instance
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_bias_relu_add_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_fwd_bias_relu_add_instance PUBLIC
)
set_target_properties
(
device_conv2d_fwd_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_fwd_bias_relu_add_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_fwd_bias_relu_add_instance
)
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_atomic_add/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_conv2d_fwd_bias_relu_atomic_add_instance
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_atomic_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_bias_relu_atomic_add_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_fwd_bias_relu_atomic_add_instance PUBLIC
)
set_target_properties
(
device_conv2d_fwd_bias_relu_atomic_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_fwd_bias_relu_atomic_add_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_fwd_bias_relu_atomic_add_instance
)
library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_gemm_instance
set
(
DEVICE_GEMM_INSTANCE_SOURCE
device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp;
device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp;
device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp;
device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp;
device_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_int8_int8_int8_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp;
)
add_library
(
device_gemm_instance SHARED
${
DEVICE_GEMM_INSTANCE_SOURCE
}
)
target_compile_features
(
device_gemm_instance PUBLIC
)
set_target_properties
(
device_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_gemm_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_gemm_instance
)
library/src/tensor_operation_instance/gpu/gemm_bias2d/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_gemm_bias2d_instance
set
(
DEVICE_GEMM_BIAS2D_INSTANCE_SOURCE
device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_km_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f32_f32_f32_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_km_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_2d_f16_f16_f16_mk_nk_mn_instance.cpp;
)
add_library
(
device_gemm_bias2d_instance SHARED
${
DEVICE_GEMM_BIAS2D_INSTANCE_SOURCE
}
)
target_compile_features
(
device_gemm_bias2d_instance PUBLIC
)
set_target_properties
(
device_gemm_bias2d_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_gemm_bias2d_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_gemm_bias2d_instance
)
library/src/tensor_operation_instance/gpu/gemm_bias_relu/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_gemm_bias_relu_instance
set
(
DEVICE_GEMM_BIAS_RELU_INSTANCE_SOURCE
device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_f16_f16_f16_km_nk_mn_instance.cpp;
)
add_library
(
device_gemm_bias_relu_instance SHARED
${
DEVICE_GEMM_BIAS_RELU_INSTANCE_SOURCE
}
)
target_compile_features
(
device_gemm_bias_relu_instance PUBLIC
)
set_target_properties
(
device_gemm_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_gemm_bias_relu_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_gemm_bias_relu_instance
)
library/src/tensor_operation_instance/gpu/gemm_bias_relu_add/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_gemm_bias_relu_add_instance
set
(
DEVICE_GEMM_BIAS_RELU_ADD_INSTANCE_SOURCE
device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_mk_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_mk_nk_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_km_kn_mn_instance.cpp;
device_gemm_xdl_c_shuffle_bias_relu_add_f16_f16_f16_km_nk_mn_instance.cpp;
)
add_library
(
device_gemm_bias_relu_add_instance SHARED
${
DEVICE_GEMM_BIAS_RELU_ADD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_gemm_bias_relu_add_instance PUBLIC
)
set_target_properties
(
device_gemm_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_gemm_bias_relu_add_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_gemm_bias_relu_add_instance
)
library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt
0 → 100644
View file @
5ab94474
# device_reduce_instance
set
(
DEVICE_REDUCE_INSTANCE_SOURCE
device_reduce_instance_blockwise_f16_f16_f16.cpp;
device_reduce_instance_blockwise_f16_f32_f16.cpp;
device_reduce_instance_blockwise_f32_f32_f32.cpp;
device_reduce_instance_blockwise_f32_f64_f32.cpp;
device_reduce_instance_blockwise_f64_f64_f64.cpp;
device_reduce_instance_threadwise_f16_f16_f16.cpp;
device_reduce_instance_threadwise_f16_f32_f16.cpp;
device_reduce_instance_threadwise_f32_f32_f32.cpp;
device_reduce_instance_threadwise_f32_f64_f32.cpp;
device_reduce_instance_threadwise_f64_f64_f64.cpp;
device_reduce_instance_blockwise_second_call_f16_f16_f16.cpp;
device_reduce_instance_blockwise_second_call_f32_f32_f16.cpp;
device_reduce_instance_blockwise_second_call_f32_f32_f32.cpp;
device_reduce_instance_blockwise_second_call_f64_f64_f32.cpp;
device_reduce_instance_blockwise_second_call_f64_f64_f64.cpp;
device_reduce_instance_multiblock_atomic_add_f16_f32_f32.cpp;
device_reduce_instance_multiblock_atomic_add_f32_f32_f32.cpp;
device_reduce_instance_multiblock_atomic_add_f32_f64_f32.cpp;
device_reduce_instance_multiblock_partial_reduce_f16_f16_f16.cpp;
device_reduce_instance_multiblock_partial_reduce_f16_f32_f16.cpp;
device_reduce_instance_multiblock_partial_reduce_f32_f32_f32.cpp;
device_reduce_instance_multiblock_partial_reduce_f32_f64_f32.cpp;
device_reduce_instance_multiblock_partial_reduce_f64_f64_f64.cpp;
)
add_library
(
device_reduce_instance SHARED
${
DEVICE_REDUCE_INSTANCE_SOURCE
}
)
target_compile_features
(
device_reduce_instance PUBLIC
)
set_target_properties
(
device_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_reduce_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_reduce_instance
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment