Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
c508a7c9
Commit
c508a7c9
authored
Dec 04, 2023
by
Artur Wojcik
Browse files
temp
parent
bc4bf9bd
Changes
28
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
277 additions
and
160 deletions
+277
-160
.gitignore
.gitignore
+9
-0
CMakeLists.txt
CMakeLists.txt
+19
-13
client_example/01_gemm/CMakeLists.txt
client_example/01_gemm/CMakeLists.txt
+2
-0
client_example/02_gemm_add_add_fastgelu/CMakeLists.txt
client_example/02_gemm_add_add_fastgelu/CMakeLists.txt
+6
-0
client_example/03_gemm_layernorm/CMakeLists.txt
client_example/03_gemm_layernorm/CMakeLists.txt
+2
-0
client_example/04_contraction/CMakeLists.txt
client_example/04_contraction/CMakeLists.txt
+5
-1
client_example/05_layernorm/CMakeLists.txt
client_example/05_layernorm/CMakeLists.txt
+3
-0
client_example/06_softmax/CMakeLists.txt
client_example/06_softmax/CMakeLists.txt
+1
-0
client_example/08_fused_attention/CMakeLists.txt
client_example/08_fused_attention/CMakeLists.txt
+2
-0
client_example/09_quantization/CMakeLists.txt
client_example/09_quantization/CMakeLists.txt
+7
-0
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
+6
-0
client_example/12_elementwise_normalization/CMakeLists.txt
client_example/12_elementwise_normalization/CMakeLists.txt
+1
-0
client_example/13_batchnorm/CMakeLists.txt
client_example/13_batchnorm/CMakeLists.txt
+3
-0
client_example/14_instance_id/CMakeLists.txt
client_example/14_instance_id/CMakeLists.txt
+1
-0
client_example/CMakeLists.txt
client_example/CMakeLists.txt
+56
-10
cmake/EnableCompilerWarnings.cmake
cmake/EnableCompilerWarnings.cmake
+79
-85
cmake/getopt.cmake
cmake/getopt.cmake
+28
-0
cmake/googletest.cmake
cmake/googletest.cmake
+0
-50
cmake/gtest.cmake
cmake/gtest.cmake
+46
-0
example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp
example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp
+1
-1
No files found.
.gitignore
View file @
c508a7c9
...
...
@@ -56,3 +56,12 @@ _templates/
_toc.yml
docBin/
_doxygen/
# JetBrains IDE
.idea/
cmake-build*/
build*/
# Python virtualenv
.venv/
CMakeLists.txt
View file @
c508a7c9
...
...
@@ -4,22 +4,27 @@ if(POLICY CMP0140)
cmake_policy
(
SET CMP0140 NEW
)
endif
()
get_property
(
_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG
)
# This has to be initialized before the project() command appears
# Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE
if
(
NOT MSVC_IDE AND NOT CMAKE_BUILD_TYPE
)
set
(
CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
)
if
(
_GENERATOR_IS_MULTI_CONFIG
)
set
(
CMAKE_CONFIGURATION_TYPES
"Debug;Release;RelWithDebInfo;MinSizeRel"
CACHE STRING
"Available build types (configurations) on multi-config generators"
)
else
()
set
(
CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
)
endif
()
# Default installation path
if
(
WIN32
)
set
(
CMAKE_INSTALL_PREFIX
"/opt/rocm/x86_64-w64-mingw32"
CACHE PATH
""
)
else
()
if
(
NOT WIN32
)
set
(
CMAKE_INSTALL_PREFIX
"/opt/rocm"
CACHE PATH
""
)
endif
()
set
(
version 1.1.0
)
# Check support for CUDA/HIP in Cmake
project
(
composable_kernel VERSION
${
version
}
)
project
(
composable_kernel VERSION
${
version
}
LANGUAGES CXX
)
include
(
CTest
)
list
(
APPEND CMAKE_MODULE_PATH
"
${
PROJECT_SOURCE_DIR
}
/cmake"
)
...
...
@@ -72,15 +77,15 @@ if(INSTANCES_ONLY)
set
(
CK_ENABLE_INSTANCES_ONLY
"ON"
)
endif
()
include
(
getopt
)
# CK config file to record supported datatypes, etc.
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/
include/ck/config.h.in
"
"
${
PROJEC
T_BINARY_DIR
}
/include/ck/config.h
"
)
configure_file
(
include/ck/config.h.in
${
CMAKE_CURREN
T_BINARY_DIR
}
/include/ck/config.h
)
# CK version file to record release version as well as git commit hash
find_package
(
Git REQUIRED
)
execute_process
(
COMMAND
"
${
GIT_EXECUTABLE
}
"
rev-parse HEAD OUTPUT_VARIABLE COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/include/ck/version.h.in"
"
${
PROJECT_BINARY_DIR
}
/include/ck/version.h"
)
enable_testing
()
configure_file
(
include/ck/version.h.in
${
CMAKE_CURRENT_BINARY_DIR
}
/include/ck/version.h
)
set
(
ROCM_SYMLINK_LIBS OFF
)
find_package
(
ROCM REQUIRED PATHS /opt/rocm
)
...
...
@@ -96,7 +101,7 @@ include(TargetFlags)
rocm_setup_version
(
VERSION
${
version
}
)
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
)
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
"$ENV{ROCM_PATH}"
"$ENV{HIP_PATH}"
)
message
(
"GPU_TARGETS=
${
GPU_TARGETS
}
"
)
...
...
@@ -167,7 +172,6 @@ find_package(Threads REQUIRED)
link_libraries
(
Threads::Threads
)
## C++
enable_language
(
CXX
)
set
(
CMAKE_CXX_STANDARD 17
)
set
(
CMAKE_CXX_STANDARD_REQUIRED ON
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
...
...
@@ -435,7 +439,9 @@ if(NOT DEFINED INSTANCES_ONLY)
PACKAGE_NAME examples
)
add_subdirectory
(
example
)
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
endif
()
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
...
...
client_example/01_gemm/CMakeLists.txt
View file @
c508a7c9
add_executable
(
client_gemm gemm.cpp
)
target_link_libraries
(
client_gemm PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_gemm PRIVATE cxx_std_17
)
client_example/02_gemm_add_add_fastgelu/CMakeLists.txt
View file @
c508a7c9
...
...
@@ -2,12 +2,15 @@ add_custom_target(client_gemm_fastgelu_examples)
add_executable
(
client_gemm_add_add_fastgelu gemm_add_add_fastgelu.cpp
)
target_link_libraries
(
client_gemm_add_add_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
target_compile_features
(
client_gemm_add_add_fastgelu PRIVATE cxx_std_17
)
add_executable
(
client_gemm_add_fastgelu gemm_add_fastgelu.cpp
)
target_link_libraries
(
client_gemm_add_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
target_compile_features
(
client_gemm_add_fastgelu PRIVATE cxx_std_17
)
add_executable
(
client_gemm_fastgelu gemm_fastgelu.cpp
)
target_link_libraries
(
client_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
target_compile_features
(
client_gemm_fastgelu PRIVATE cxx_std_17
)
add_dependencies
(
client_gemm_fastgelu_examples client_gemm_add_add_fastgelu client_gemm_add_fastgelu
client_gemm_fastgelu
)
...
...
@@ -16,12 +19,15 @@ add_custom_target(client_gemm_fastgelu_generic_examples)
add_executable
(
client_gemm_add_add_fastgelu_generic gemm_add_add_fastgelu_generic.cpp
)
target_link_libraries
(
client_gemm_add_add_fastgelu_generic composable_kernel::device_gemm_operations
)
target_compile_features
(
client_gemm_add_add_fastgelu_generic PRIVATE cxx_std_17
)
add_executable
(
client_gemm_add_fastgelu_generic gemm_add_fastgelu_generic.cpp
)
target_link_libraries
(
client_gemm_add_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations
)
target_compile_features
(
client_gemm_add_fastgelu_generic PRIVATE cxx_std_17
)
add_executable
(
client_gemm_fastgelu_generic gemm_fastgelu_generic.cpp
)
target_link_libraries
(
client_gemm_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations
)
target_compile_features
(
client_gemm_fastgelu_generic PRIVATE cxx_std_17
)
add_dependencies
(
client_gemm_fastgelu_generic_examples client_gemm_add_add_fastgelu_generic
client_gemm_add_fastgelu_generic client_gemm_fastgelu_generic
)
client_example/03_gemm_layernorm/CMakeLists.txt
View file @
c508a7c9
add_executable
(
client_gemm_add_add_layernorm_naive gemm_add_add_layernorm_naive.cpp
)
target_link_libraries
(
client_gemm_add_add_layernorm_naive PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations
)
target_compile_features
(
client_gemm_add_add_reduce_normalize PRIVATE cxx_std_17
)
add_executable
(
client_gemm_add_relu_add_layernorm_welford gemm_add_relu_add_layernorm_welford.cpp
)
target_link_libraries
(
client_gemm_add_relu_add_layernorm_welford PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations
)
target_compile_features
(
client_gemm_add_relu_add_layernorm_welford PRIVATE cxx_std_17
)
client_example/04_contraction/CMakeLists.txt
View file @
c508a7c9
add_executable
(
client_contraction_scale_fp32 contraction_scale_fp32.cpp
)
target_link_libraries
(
client_contraction_scale_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_contraction_scale PRIVATE cxx_std_17
)
add_executable
(
client_contraction_bilinear_fp32 contraction_bilinear_fp32.cpp
)
target_link_libraries
(
client_contraction_bilinear_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_contraction_bilinear PRIVATE cxx_std_17
)
add_executable
(
client_contraction_scale_fp64 contraction_scale_fp64.cpp
)
target_link_libraries
(
client_contraction_scale_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_contraction_scale_fp64 PRIVATE cxx_std_17
)
add_executable
(
client_contraction_bilinear_fp64 contraction_bilinear_fp64.cpp
)
target_link_libraries
(
client_contraction_bilinear_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_contraction_blinear_fp64 PRIVATE cxx_std_17
)
add_executable
(
contraction_g1m2n3k1_add_xdl_fp16 contraction_g1m2n3k1_add_xdl_fp16.cpp
)
target_link_libraries
(
contraction_g1m2n3k1_add_xdl_fp16 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
contraction_g1m2n3k1_add_xdl-fp16 PRIVATE cxx_std_17
)
client_example/05_layernorm/CMakeLists.txt
View file @
c508a7c9
add_executable
(
client_layernorm2d_fwd layernorm2d_fwd.cpp
)
target_link_libraries
(
client_layernorm2d_fwd PRIVATE composable_kernel::device_other_operations
)
target_compile_features
(
client_layernorm2d_fwd PRIVATE cxx_std_17
)
add_executable
(
client_layernorm4d_fwd layernorm4d_fwd.cpp
)
target_link_libraries
(
client_layernorm4d_fwd PRIVATE composable_kernel::device_other_operations
)
target_compile_features
(
client_layernorm4d_fwd PRIVATE cxx_std_17
)
client_example/06_softmax/CMakeLists.txt
View file @
c508a7c9
add_executable
(
client_softmax4d softmax4d.cpp
)
target_link_libraries
(
client_softmax4d PRIVATE composable_kernel::device_other_operations composable_kernel::device_reduction_operations
)
target_compile_features
(
client_softmax4d PRIVATE cxx_std_17
)
client_example/08_fused_attention/CMakeLists.txt
View file @
c508a7c9
add_executable
(
client_fused_attention fused_attention.cpp
)
target_link_libraries
(
client_fused_attention PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_fused_attention PRIVATE cxx_std_17
)
add_executable
(
client_fused_attention_bias fused_attention_bias.cpp
)
target_link_libraries
(
client_fused_attention_bias PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_fused_attention_bias PRIVATE cxx_std_17
)
client_example/09_quantization/CMakeLists.txt
View file @
c508a7c9
if
(
DTYPES MATCHES
"int8"
OR NOT DEFINED DTYPES
)
add_executable
(
client_conv2d_fwd_bias_tanh_perchannel_quantization conv2d_fwd_bias_tanh_perchannel_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_bias_tanh_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_conv2d_fwd_bias_tanh_perchangel_quantization PRIVATE cxx_std_17
)
add_executable
(
client_conv2d_fwd_bias_relu_perchannel_quantization conv2d_fwd_bias_relu_perchannel_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_bias_relu_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_conv2d_fwd_bias_relu_perchannel_quantization PRIVATE cxx_std_17
)
add_executable
(
client_conv2d_fwd_bias_tanh_perlayer_quantization conv2d_fwd_bias_tanh_perlayer_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_bias_tanh_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_conv2d_fwd_bias_tanh_perlayer_quantization PRIVATE cxx_std_17
)
add_executable
(
client_conv2d_fwd_bias_relu_perlayer_quantization conv2d_fwd_bias_relu_perlayer_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_bias_relu_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_conv2d_fwd_bias_relu_perlayer_quantization PRIVATE cxx_std_17
)
add_executable
(
client_conv2d_fwd_perchannel_quantization conv2d_fwd_perchannel_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_conv2d_fwd_perchannel_quantization PRIVATE cxx_std_17
)
add_executable
(
client_conv2d_fwd_perlayer_quantization conv2d_fwd_perlayer_quantization.cpp
)
target_link_libraries
(
client_conv2d_fwd_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_conv2d_fwd_perlayer_quantization PRIVATE cxx_std_17
)
add_executable
(
client_gemm_quantization gemm_quantization.cpp
)
target_link_libraries
(
client_gemm_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations
)
target_compile_features
(
client_gemm_quantization PRIVATE cxx_std_17
)
endif
()
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
View file @
c508a7c9
...
...
@@ -9,3 +9,9 @@ target_link_libraries(client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_k
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_conv_operations
)
target_compile_features
(
client_grouped_conv1d_bwd_weight_fp16 PRIVATE cxx_std_17
)
target_compile_features
(
client_grouped_conv2d_bwd_weight_fp16 PRIVATE cxx_std_17
)
target_compile_features
(
client_grouped_conv3d_bwd_weight_fp16 PRIVATE cxx_std_17
)
target_compile_features
(
client_grouped_conv3d_bwd_weight_fp32 PRIVATE cxx_std_17
)
target_compile_features
(
client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE cxx_std_17
)
client_example/12_elementwise_normalization/CMakeLists.txt
View file @
c508a7c9
add_executable
(
client_elementwise_layernorm2d elementwise_layernorm2d.cpp
)
target_link_libraries
(
client_elementwise_layernorm2d PRIVATE composable_kernel::device_other_operations
)
target_compile_features
(
client_elementwise_layernorm2d PRIVATE cxx_std_17
)
client_example/13_batchnorm/CMakeLists.txt
View file @
c508a7c9
...
...
@@ -4,3 +4,6 @@ add_executable(client_batchnorm_infer_nhwc batchnorm_infer_nhwc.cpp)
target_link_libraries
(
client_batchnorm_fwd_nhwc PRIVATE composable_kernel::device_other_operations
)
target_link_libraries
(
client_batchnorm_bwd_nhwc PRIVATE composable_kernel::device_other_operations
)
target_link_libraries
(
client_batchnorm_infer_nhwc PRIVATE composable_kernel::device_other_operations
)
target_compile_features
(
client_batchnorm_fwd_nhwc PRIVATE cxx_std_17
)
target_compile_features
(
client_batchnorm_bwd_nhwc PRIVATE cxx_std_17
)
target_compile_features
(
client_batchnorm_infer_nhwc PRIVATE cxx_std_17
)
client_example/14_instance_id/CMakeLists.txt
View file @
c508a7c9
add_executable
(
client_batchnorm_fwd_instance_id batchnorm_fwd_instance_id.cpp
)
target_link_libraries
(
client_batchnorm_fwd_instance_id PRIVATE composable_kernel::device_other_operations
)
target_compile_features
(
client_batchnorm_fwd_instance_id PRIVATE cxx_std_17
)
client_example/CMakeLists.txt
View file @
c508a7c9
cmake_minimum_required
(
VERSION 3.15
)
project
(
ck_app
)
add_compile_options
(
-std=c++17
)
project
(
ck_app LANGUAGES CXX
)
if
(
DTYPES
)
add_definitions
(
-DDTYPES
)
...
...
@@ -49,13 +48,60 @@ else()
endif
()
find_package
(
composable_kernel COMPONENTS device_other_operations device_gemm_operations device_conv_operations device_contraction_operations device_reduction_operations
)
find_package
(
hip REQUIRED PATHS /opt/rocm
)
find_package
(
hip REQUIRED PATHS /opt/rocm
$ENV{HIP_PATH}
)
message
(
STATUS
"Build with HIP
${
hip_VERSION
}
"
)
# add all example subdir
file
(
GLOB dir_list LIST_DIRECTORIES true *
)
FOREACH
(
subdir
${
dir_list
}
)
IF
(
IS_DIRECTORY
"
${
subdir
}
"
AND
(
NOT
"
${
subdir
}
"
MATCHES
"build"
))
add_subdirectory
(
${
subdir
}
)
ENDIF
()
ENDFOREACH
()
add_subdirectory
(
01_gemm
)
add_subdirectory
(
02_gemm_bilinear
)
add_subdirectory
(
03_gemm_bias_relu
)
add_subdirectory
(
04_gemm_add_add_fastgelu
)
add_subdirectory
(
09_convnd_fwd
)
add_subdirectory
(
10_convnd_fwd_multiple_d_multiple_reduce
)
add_subdirectory
(
12_reduce
)
add_subdirectory
(
13_pool2d_fwd
)
add_subdirectory
(
14_gemm_quantization
)
add_subdirectory
(
15_grouped_gemm
)
add_subdirectory
(
16_gemm_multi_d_multi_reduces
)
add_subdirectory
(
17_convnd_bwd_data
)
add_subdirectory
(
18_batched_gemm_reduce
)
add_subdirectory
(
19_binary_elementwise
)
add_subdirectory
(
20_grouped_conv_bwd_weight
)
add_subdirectory
(
21_gemm_layernorm
)
add_subdirectory
(
22_cgemm
)
add_subdirectory
(
23_softmax
)
add_subdirectory
(
24_batched_gemm
)
add_subdirectory
(
25_gemm_bias_e_permute
)
add_subdirectory
(
26_contraction
)
add_subdirectory
(
27_layernorm2d_fwd
)
add_subdirectory
(
28_grouped_gemm_bias_e_permute
)
add_subdirectory
(
29_batched_gemm_bias_e_permute
)
add_subdirectory
(
30_grouped_conv_fwd_multiple_d
)
add_subdirectory
(
31_batched_gemm_gemm
)
add_subdirectory
(
32_batched_gemm_scale_softmax_gemm
)
add_subdirectory
(
33_multiple_reduce
)
add_subdirectory
(
34_batchnorm
)
add_subdirectory
(
35_splitK_gemm
)
add_subdirectory
(
36_sparse_embedding
)
add_subdirectory
(
37_batched_gemm_add_add_relu_gemm_add
)
add_subdirectory
(
38_grouped_conv_bwd_data_multiple_d
)
add_subdirectory
(
39_permute
)
add_subdirectory
(
40_conv2d_fwd_quantization
)
add_subdirectory
(
41_grouped_conv_conv_fwd
)
add_subdirectory
(
42_groupnorm_fwd
)
add_subdirectory
(
43_splitk_gemm_bias_e_permute
)
add_subdirectory
(
44_elementwise_permute
)
add_subdirectory
(
45_elementwise_normalization
)
add_subdirectory
(
46_gemm_add_multiply
)
add_subdirectory
(
47_gemm_bias_softmax_gemm_permute
)
add_subdirectory
(
48_pool3d_fwd
)
add_subdirectory
(
49_maxpool2d_bwd
)
add_subdirectory
(
50_put_element
)
add_subdirectory
(
51_avgpool3d_bwd
)
add_subdirectory
(
52_im2col_col2im
)
add_subdirectory
(
53_layernorm_bwd
)
add_subdirectory
(
54_groupnorm_bwd
)
add_subdirectory
(
60_gemm_multi_ABD
)
add_subdirectory
(
61_contraction_multi_ABD
)
add_subdirectory
(
62_conv_fwd_activ
)
add_subdirectory
(
63_layernorm4d_fwd
)
add_subdirectory
(
64_tensor_transforms
)
\ No newline at end of file
cmake/EnableCompilerWarnings.cmake
View file @
c508a7c9
...
...
@@ -25,40 +25,18 @@
################################################################################
# - Enable warning all for gcc/clang or use /W4 for visual studio
## Strict warning level
if
(
MSVC
)
# Use the highest warning level for visual studio.
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/w"
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/w"
)
# set(CMAKE_CXX_WARNING_LEVEL 4)
# if (CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
# string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
# else ()
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
# endif ()
## Strict compile options for Visual C++ compiler
set
(
__default_msvc_compile_options /w
)
# set(CMAKE_C_WARNING_LEVEL 4)
# if (CMAKE_C_FLAGS MATCHES "/W[0-4]")
# string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
# else ()
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
# endif ()
else
()
foreach
(
COMPILER C CXX
)
set
(
CMAKE_COMPILER_WARNINGS
)
# use -Wall for gcc and clang
list
(
APPEND CMAKE_COMPILER_WARNINGS
-Wall
-Wextra
## Strict compile options for GNU/Clang compilers
set
(
__default_compile_options
-Wall -Wextra
-Wcomment
-Wendif-labels
-Wformat
-Winit-self
-Wreturn-type
-Wsequence-point
# Shadow is broken on gcc when using lambdas
# -Wshadow
-Wswitch
-Wtrigraphs
-Wundef
...
...
@@ -72,9 +50,11 @@ else()
-Wno-extra-semi-stmt
-Wno-unused-template
)
if
(
CMAKE_
${
COMPILER
}
_COMPILER_ID MATCHES
"Clang"
)
list
(
APPEND CMAKE_COMPILER_WARNINGS
## Strict compile options for Clang compilers
set
(
__default_clang_compile_options
-Weverything
-Wshadow
-Wno-c++98-compat
-Wno-c++98-compat-pedantic
-Wno-conversion
...
...
@@ -94,21 +74,35 @@ else()
-Wno-unused-command-line-argument
-Wno-weak-vtables
-Wno-covered-switch-default
-Wno-unsafe-buffer-usage
)
else
()
if
(
CMAKE_
${
COMPILER
}
_COMPILER_ID MATCHES
"GNU"
AND
${
COMPILER
}
MATCHES
"CXX"
)
# cmake 3.5.2 does not support >=.
if
(
NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS
"6.1"
)
list
(
APPEND CMAKE_COMPILER_WARNINGS
-Wno-ignored-attributes
)
endif
()
endif
()
list
(
APPEND CMAKE_COMPILER_WARNINGS
-Wno-unsafe-buffer-usage
)
if
(
WIN32
)
list
(
APPEND __default_clang_compile_options
-fms-extensions
-fms-compatibility
-fdelayed-template-parsing
)
endif
()
set
(
__default_gnu_compile_options
-Wduplicated-branches
-Wduplicated-cond
-Wno-noexcept-type
-Wno-ignored-attributes
-Wodr
-Wshift-negative-value
-Wshift-overflow=2
-Wno-missing-field-initializers
-Wno-deprecated-declarations
)
endif
()
add_definitions
(
${
CMAKE_COMPILER_WARNINGS
}
)
endforeach
()
endif
()
-Wno-maybe-uninitialized
-Wno-deprecated-declarations
)
add_compile_options
(
"$<$<OR:$<CXX_COMPILER_ID:MSVC>,$<C_COMPILER_ID:MSVC>>:
${
__default_msvc_compile_options
}
>"
"$<$<OR:$<CXX_COMPILER_ID:GNU,Clang>,$<C_COMPILER_ID:GNU,Clang>>:
${
__default_compile_options
}
>"
"$<$<OR:$<AND:$<CXX_COMPILER_ID:GNU>,$<VERSION_GREATER_EQUAL:$<CXX_COMPILER_VERSION>,7>>,$<AND:$<C_COMPILER_ID:GNU>,$<VERSION_GREATER_EQUAL:$<C_COMPILER_VERSION>,7>>>:
${
__default_gnu_compile_options
}
>"
"$<$<OR:$<CXX_COMPILER_ID:Clang>,$<C_COMPILER_ID:Clang>>:
${
__default_clang_compile_options
}
>"
)
unset
(
__default_msvc_compile_options
)
unset
(
__default_compile_options
)
unset
(
__default_gnu_compile_options
)
unset
(
__default_clang_compile_options
)
cmake/getopt.cmake
0 → 100644
View file @
c508a7c9
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
add_library
(
getopt::getopt INTERFACE IMPORTED GLOBAL
)
if
(
WIN32
)
include
(
FetchContent
)
FetchContent_Declare
(
getopt
GIT_REPOSITORY https://github.com/apwojcik/getopt.git
GIT_TAG main
SYSTEM
)
set
(
__build_shared_libs
${
BUILD_SHARED_LIBS
}
)
set
(
BUILD_SHARED_LIBS OFF CACHE INTERNAL
""
)
FetchContent_MakeAvailable
(
getopt
)
# Restore the old value of BUILD_SHARED_LIBS
set
(
BUILD_SHARED_LIBS
${
__build_shared_libs
}
CACHE BOOL
"Type of libraries to build"
FORCE
)
FetchContent_GetProperties
(
getopt
)
target_link_libraries
(
getopt::getopt INTERFACE wingetopt
)
target_include_directories
(
getopt::getopt INTERFACE
${
getopt_SOURCE_DIR
}
/src
)
endif
()
\ No newline at end of file
cmake/googletest.cmake
deleted
100644 → 0
View file @
bc4bf9bd
include
(
FetchContent
)
set
(
GOOGLETEST_DIR
""
CACHE STRING
"Location of local GoogleTest repo to build against"
)
if
(
GOOGLETEST_DIR
)
set
(
FETCHCONTENT_SOURCE_DIR_GOOGLETEST
${
GOOGLETEST_DIR
}
CACHE STRING
"GoogleTest source directory override"
)
endif
()
message
(
STATUS
"Fetching GoogleTest"
)
list
(
APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-undef
-Wno-reserved-identifier
-Wno-global-constructors
-Wno-missing-noreturn
-Wno-disabled-macro-expansion
-Wno-used-but-marked-unused
-Wno-switch-enum
-Wno-zero-as-null-pointer-constant
-Wno-unused-member-function
-Wno-comma
-Wno-old-style-cast
-Wno-deprecated
-Wno-unsafe-buffer-usage
)
message
(
STATUS
"Suppressing googltest warnings with flags:
${
GTEST_CMAKE_CXX_FLAGS
}
"
)
FetchContent_Declare
(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG b85864c64758dec007208e56af933fc3f52044ee
)
# Will be necessary for windows build
# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_GetProperties
(
googletest
)
if
(
NOT googletest_POPULATED
)
FetchContent_Populate
(
googletest
)
add_subdirectory
(
${
googletest_SOURCE_DIR
}
${
googletest_BINARY_DIR
}
EXCLUDE_FROM_ALL
)
endif
()
target_compile_options
(
gtest PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gtest_main PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gmock PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gmock_main PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
set_target_properties
(
gtest PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
gtest_main PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
gmock PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
gmock_main PROPERTIES POSITION_INDEPENDENT_CODE ON
)
cmake/gtest.cmake
0 → 100644
View file @
c508a7c9
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
include
(
FetchContent
)
set
(
GOOGLETEST_DIR
""
CACHE STRING
"Location of local GoogleTest repo to build against"
)
if
(
GOOGLETEST_DIR
)
set
(
FETCHCONTENT_SOURCE_DIR_GOOGLETEST
${
GOOGLETEST_DIR
}
CACHE STRING
"GoogleTest source directory override"
)
endif
()
set
(
BUILD_GMOCK OFF CACHE INTERNAL
""
)
set
(
INSTALL_GTEST OFF CACHE INTERNAL
""
)
FetchContent_Declare
(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG f8d7d77c06936315286eb55f8de22cd23c188571
SYSTEM
)
if
(
WIN32
)
set
(
gtest_force_shared_crt ON CACHE_INTERNAL
""
)
endif
()
# Store the current value of BUILD_SHARED_LIBS
set
(
__build_shared_libs
${
BUILD_SHARED_LIBS
}
)
set
(
BUILD_SHARED_LIBS OFF CACHE INTERNAL
""
)
FetchContent_MakeAvailable
(
googletest
)
# Restore the old value of BUILD_SHARED_LIBS
set
(
BUILD_SHARED_LIBS
${
__build_shared_libs
}
CACHE BOOL
"Type of libraries to build"
FORCE
)
if
(
WIN32
)
list
(
APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-suggest-destructor-override
-Wno-suggest-override
-Wno-nonportable-system-include-path
-Wno-language-extension-token
)
endif
()
target_compile_options
(
gtest PRIVATE -Wno-undef
)
target_compile_options
(
gtest_main PRIVATE -Wno-undef
)
example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp
View file @
c508a7c9
...
...
@@ -79,7 +79,7 @@ std::ostream& show_2d_matrix(std::ostream& os, Tensor<DataType>& matrix)
}
int
main
(
int
argc
,
char
*
argv
[])
{
bool
do_verification
=
0
;
bool
do_verification
=
false
;
int
init_method
=
0
;
bool
time_kernel
=
false
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment