Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
4031a392
"host/online_compilation/hip_utility/hipoc_kernel.cpp" did not exist on "1685048a6725e531b577510295d2d62664c15962"
Commit
4031a392
authored
Feb 11, 2025
by
Jakub Piasecki
Browse files
add ck tile examples to package
parent
2312f4aa
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
66 additions
and
12 deletions
+66
-12
CMakeLists.txt
CMakeLists.txt
+7
-9
Jenkinsfile
Jenkinsfile
+2
-2
example/ck_tile/01_fmha/CMakeLists.txt
example/ck_tile/01_fmha/CMakeLists.txt
+6
-0
example/ck_tile/02_layernorm2d/CMakeLists.txt
example/ck_tile/02_layernorm2d/CMakeLists.txt
+3
-0
example/ck_tile/03_gemm/CMakeLists.txt
example/ck_tile/03_gemm/CMakeLists.txt
+5
-0
example/ck_tile/04_img2col/CMakeLists.txt
example/ck_tile/04_img2col/CMakeLists.txt
+3
-0
example/ck_tile/05_reduce/CMakeLists.txt
example/ck_tile/05_reduce/CMakeLists.txt
+3
-0
example/ck_tile/06_permute/CMakeLists.txt
example/ck_tile/06_permute/CMakeLists.txt
+3
-0
example/ck_tile/09_topk_softmax/CMakeLists.txt
example/ck_tile/09_topk_softmax/CMakeLists.txt
+3
-0
example/ck_tile/10_rmsnorm2d/CMakeLists.txt
example/ck_tile/10_rmsnorm2d/CMakeLists.txt
+6
-0
example/ck_tile/11_add_rmsnorm2d_rdquant/CMakeLists.txt
example/ck_tile/11_add_rmsnorm2d_rdquant/CMakeLists.txt
+6
-0
example/ck_tile/12_smoothquant/CMakeLists.txt
example/ck_tile/12_smoothquant/CMakeLists.txt
+3
-0
example/ck_tile/13_moe_sorting/CMakeLists.txt
example/ck_tile/13_moe_sorting/CMakeLists.txt
+3
-0
example/ck_tile/14_moe_smoothquant/CMakeLists.txt
example/ck_tile/14_moe_smoothquant/CMakeLists.txt
+3
-0
example/ck_tile/15_fused_moe/CMakeLists.txt
example/ck_tile/15_fused_moe/CMakeLists.txt
+3
-0
example/ck_tile/16_batched_gemm/CMakeLists.txt
example/ck_tile/16_batched_gemm/CMakeLists.txt
+3
-0
example/ck_tile/17_grouped_gemm/CMakeLists.txt
example/ck_tile/17_grouped_gemm/CMakeLists.txt
+2
-0
example/ck_tile/35_batched_transpose/CMakeLists.txt
example/ck_tile/35_batched_transpose/CMakeLists.txt
+2
-1
No files found.
CMakeLists.txt
View file @
4031a392
...
...
@@ -196,20 +196,17 @@ if (SUPPORTED_GPU_TARGETS MATCHES "gfx9")
add_definitions
(
-DCK_USE_XDL
)
set
(
CK_USE_XDL
"ON"
)
endif
()
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx94"
OR SUPPORTED_GPU_TARGETS MATCHES
"gfx95"
)
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx94"
)
message
(
"Enabling FP8 gemms on native architectures"
)
add_definitions
(
-DCK_USE_GFX94
)
set
(
CK_USE_GFX94
"ON"
)
endif
()
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx95"
)
add_definitions
(
-DCK_USE_AMD_MFMA_GFX950
)
endif
()
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx11"
OR SUPPORTED_GPU_TARGETS MATCHES
"gfx12"
)
message
(
"Enabling WMMA instances"
)
add_definitions
(
-DCK_USE_WMMA
)
set
(
CK_USE_WMMA
"ON"
)
endif
()
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx12"
OR SUPPORTED_GPU_TARGETS MATCHES
"gfx950"
)
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx12"
)
add_definitions
(
-DCK_USE_OCP_FP8
)
set
(
CK_USE_OCP_FP8
"ON"
)
endif
()
...
...
@@ -217,10 +214,6 @@ if (SUPPORTED_GPU_TARGETS MATCHES "gfx90a" OR SUPPORTED_GPU_TARGETS MATCHES "gfx
add_definitions
(
-DCK_USE_FNUZ_FP8
)
set
(
CK_USE_FNUZ_FP8
"ON"
)
endif
()
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx950"
)
add_definitions
(
-DCK_USE_NATIVE_MX_SUPPORT
)
set
(
CK_USE_NATIVE_MX_SUPPORT
"ON"
)
endif
()
option
(
CK_USE_FP8_ON_UNSUPPORTED_ARCH
"Enable FP8 GEMM instances on older architectures"
OFF
)
if
(
CK_USE_FP8_ON_UNSUPPORTED_ARCH
AND
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx90a"
OR SUPPORTED_GPU_TARGETS MATCHES
"gfx908"
))
...
...
@@ -607,6 +600,11 @@ if(NOT GPU_ARCHS AND USER_GPU_TARGETS)
LIBRARY_NAME composablekernel
PACKAGE_NAME examples
)
rocm_package_setup_component
(
examples_ck_tile
LIBRARY_NAME composablekernel
PACKAGE_NAME examples_ck_tile
)
add_subdirectory
(
example
)
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
...
...
Jenkinsfile
View file @
4031a392
...
...
@@ -189,7 +189,7 @@ def cmake_build(Map conf=[:]){
def
package_build
=
(
conf
.
get
(
"package_build"
,
""
)
==
"true"
)
if
(
package_build
==
true
)
{
config_targets
=
"package"
config_targets
=
"
examples_ck_tile
package"
}
if
(
conf
.
get
(
"build_install"
,
""
)
==
"true"
)
...
...
@@ -523,7 +523,7 @@ def Build_CK(Map conf=[:]){
if
(
params
.
RUN_FULL_QA
&&
arch_type
==
1
){
// build deb packages for all gfx9 targets on gfx90a system and prepare to export
echo
"Build ckProfiler package"
sh
'make -j package'
sh
'make -j
examples_ck_tile
package'
archiveArtifacts
artifacts:
'composablekernel-ckprofiler_*.deb'
sh
'mv composablekernel-ckprofiler_*.deb ckprofiler_0.2.0_amd64.deb'
stash
includes:
"ckprofiler_0.2.0_amd64.deb"
,
name:
"ckprofiler_0.2.0_amd64.deb"
...
...
example/ck_tile/01_fmha/CMakeLists.txt
View file @
4031a392
...
...
@@ -62,6 +62,9 @@ add_executable(${EXAMPLE_FMHA_FWD} EXCLUDE_FROM_ALL fmha_fwd.cpp)
target_include_directories
(
${
EXAMPLE_FMHA_FWD
}
PRIVATE
${
CMAKE_CURRENT_LIST_DIR
}
)
target_sources
(
${
EXAMPLE_FMHA_FWD
}
PRIVATE
${
FMHA_FWD_GEN_BLOBS
}
)
add_dependencies
(
examples_ck_tile
${
EXAMPLE_FMHA_FWD
}
)
rocm_install
(
TARGETS
${
EXAMPLE_FMHA_FWD
}
COMPONENT examples_ck_tile
)
set
(
EXAMPLE_FMHA_BWD
"tile_example_fmha_bwd"
)
# not using add_example_executable() to add this target, since we don't want this to have
# to be included in "make all/install/check"
...
...
@@ -70,6 +73,9 @@ add_executable(${EXAMPLE_FMHA_BWD} EXCLUDE_FROM_ALL fmha_bwd.cpp)
target_include_directories
(
${
EXAMPLE_FMHA_BWD
}
PRIVATE
${
CMAKE_CURRENT_LIST_DIR
}
)
target_sources
(
${
EXAMPLE_FMHA_BWD
}
PRIVATE
${
FMHA_BWD_GEN_BLOBS
}
)
add_dependencies
(
examples_ck_tile
${
EXAMPLE_FMHA_BWD
}
)
rocm_install
(
TARGETS
${
EXAMPLE_FMHA_BWD
}
COMPONENT examples_ck_tile
)
# NOTE: this is dangerous since will change the whole kernel to flush denormals
# WIP with compiler team for an exp2 intrinsic..., then remove this
if
(
NOT DEFINED FMHA_FWD_FAST_EXP2
)
...
...
example/ck_tile/02_layernorm2d/CMakeLists.txt
View file @
4031a392
...
...
@@ -37,6 +37,9 @@ list(APPEND EXAMPLE_LAYERNORM2D_FWD_COMPILE_OPTIONS -Wno-undefined-func-template
target_compile_options
(
${
EXAMPLE_LAYERNORM2D_FWD
}
PRIVATE
${
EXAMPLE_LAYERNORM2D_FWD_COMPILE_OPTIONS
}
)
add_dependencies
(
examples_ck_tile
${
EXAMPLE_LAYERNORM2D_FWD
}
)
rocm_install
(
TARGETS
${
EXAMPLE_LAYERNORM2D_FWD
}
COMPONENT examples_ck_tile
)
# TODO: we have to turn off this global prop, otherwise the progress bar generated
# by cmake will print too many files, execvp: /bin/sh: Argument list too long
# however, this property may affect global
...
...
example/ck_tile/03_gemm/CMakeLists.txt
View file @
4031a392
add_executable
(
tile_example_gemm_basic EXCLUDE_FROM_ALL gemm_basic.cpp
)
add_executable
(
tile_example_gemm_universal EXCLUDE_FROM_ALL universal_gemm.cpp
)
add_dependencies
(
examples_ck_tile tile_example_gemm_basic
)
rocm_install
(
TARGETS tile_example_gemm_basic COMPONENT examples_ck_tile
)
add_dependencies
(
examples_ck_tile tile_example_gemm_universal
)
rocm_install
(
TARGETS tile_example_gemm_universal COMPONENT examples_ck_tile
)
\ No newline at end of file
example/ck_tile/04_img2col/CMakeLists.txt
View file @
4031a392
# not using add_example_executable() to add this target, since we don't want this to have
# to be included in "make all/install/check"
add_executable
(
tile_example_img2col EXCLUDE_FROM_ALL image_to_column.cpp
)
add_dependencies
(
examples_ck_tile tile_example_img2col
)
rocm_install
(
TARGETS tile_example_img2col COMPONENT examples_ck_tile
)
\ No newline at end of file
example/ck_tile/05_reduce/CMakeLists.txt
View file @
4031a392
...
...
@@ -7,6 +7,9 @@ add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL reduce.cpp)
target_include_directories
(
${
EXAMPLE_REDUCE
}
PRIVATE
${
CMAKE_CURRENT_LIST_DIR
}
)
set
(
EXAMPLE_REDUCE_COMPILE_OPTIONS
)
add_dependencies
(
examples_ck_tile
${
EXAMPLE_REDUCE
}
)
rocm_install
(
TARGETS
${
EXAMPLE_REDUCE
}
COMPONENT examples_ck_tile
)
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
list
(
APPEND EXAMPLE_REDUCE_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal
)
...
...
example/ck_tile/06_permute/CMakeLists.txt
View file @
4031a392
...
...
@@ -2,6 +2,9 @@
# to be included in "make all/install/check"
add_executable
(
tile_example_permute EXCLUDE_FROM_ALL permute.cpp
)
add_dependencies
(
examples_ck_tile tile_example_permute
)
rocm_install
(
TARGETS tile_example_permute COMPONENT examples_ck_tile
)
if
(
NOT DEFINED PERMUTE_USE_ALTERNATIVE_IMPL
)
# set(PERMUTE_USE_ALTERNATIVE_IMPL false)
set
(
PERMUTE_USE_ALTERNATIVE_IMPL true
)
...
...
example/ck_tile/09_topk_softmax/CMakeLists.txt
View file @
4031a392
add_executable
(
tile_example_topk_softmax EXCLUDE_FROM_ALL topk_softmax.cpp topk_softmax_api.cpp
)
target_include_directories
(
tile_example_topk_softmax PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
)
add_dependencies
(
examples_ck_tile tile_example_topk_softmax
)
rocm_install
(
TARGETS tile_example_topk_softmax COMPONENT examples_ck_tile
)
set
(
EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS
)
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
list
(
APPEND EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal
)
...
...
example/ck_tile/10_rmsnorm2d/CMakeLists.txt
View file @
4031a392
...
...
@@ -32,6 +32,9 @@ target_sources(${TILE_RMSNORM2D_FWD} PRIVATE ${RMSNORM2D_FWD_GEN_BLOBS})
set
(
TILE_RMSNORM2D_FWD_COMPILE_OPTIONS
)
add_dependencies
(
examples_ck_tile
${
TILE_RMSNORM2D_FWD
}
)
rocm_install
(
TARGETS
${
TILE_RMSNORM2D_FWD
}
COMPONENT examples_ck_tile
)
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
list
(
APPEND TILE_RMSNORM2D_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal --offload-compress
)
...
...
@@ -41,6 +44,9 @@ set(EXAMPLE_RMSNORM2D_FWD "tile_example_rmsnorm2d_fwd")
add_executable
(
${
EXAMPLE_RMSNORM2D_FWD
}
EXCLUDE_FROM_ALL example_rmsnorm2d_fwd.cpp
)
target_compile_options
(
${
EXAMPLE_RMSNORM2D_FWD
}
PRIVATE
${
TILE_RMSNORM2D_FWD_COMPILE_OPTIONS
}
)
add_dependencies
(
examples_ck_tile
${
EXAMPLE_RMSNORM2D_FWD
}
)
rocm_install
(
TARGETS
${
EXAMPLE_RMSNORM2D_FWD
}
COMPONENT examples_ck_tile
)
# TODO: we have to turn off this global prop, otherwise the progress bar generated
# by cmake will print too many files, execvp: /bin/sh: Argument list too long
# however, this property may affect global
...
...
example/ck_tile/11_add_rmsnorm2d_rdquant/CMakeLists.txt
View file @
4031a392
...
...
@@ -7,6 +7,9 @@ add_executable(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} EXCLUDE_FROM_ALL add_rmsnorm2d_
target_include_directories
(
${
TILE_ADD_RMSNORM2D_RDQUANT_FWD
}
PRIVATE
${
CMAKE_CURRENT_LIST_DIR
}
)
target_sources
(
${
TILE_ADD_RMSNORM2D_RDQUANT_FWD
}
PRIVATE
${
INSTANCE_SRCS
}
)
add_dependencies
(
examples_ck_tile
${
TILE_ADD_RMSNORM2D_RDQUANT_FWD
}
)
rocm_install
(
TARGETS
${
TILE_ADD_RMSNORM2D_RDQUANT_FWD
}
COMPONENT examples_ck_tile
)
set
(
TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS
)
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
...
...
@@ -18,6 +21,9 @@ set(EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD "tile_example_add_rmsnorm2d_rdquant_fwd")
add_executable
(
${
EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD
}
EXCLUDE_FROM_ALL example_add_rmsnorm2d_rdquant_fwd.cpp
)
target_compile_options
(
${
EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD
}
PRIVATE
${
TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS
}
)
add_dependencies
(
examples_ck_tile
${
EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD
}
)
rocm_install
(
TARGETS
${
EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD
}
COMPONENT examples_ck_tile
)
# TODO: we have to turn off this global prop, otherwise the progress bar generated
# by cmake will print too many files, execvp: /bin/sh: Argument list too long
# however, this property may affect global
...
...
example/ck_tile/12_smoothquant/CMakeLists.txt
View file @
4031a392
...
...
@@ -5,6 +5,9 @@ function (add_smoothquant_example TARGET_NAME MAIN_SRC)
add_executable
(
${
TARGET_NAME
}
EXCLUDE_FROM_ALL
${
MAIN_SRC
}
)
target_include_directories
(
${
TARGET_NAME
}
PRIVATE
${
CMAKE_CURRENT_LIST_DIR
}
)
add_dependencies
(
examples_ck_tile
${
TARGET_NAME
}
)
rocm_install
(
TARGETS
${
TARGET_NAME
}
COMPONENT examples_ck_tile
)
foreach
(
source IN LISTS ARGN
)
list
(
APPEND INSTANCE_SRCS
${
source
}
)
endforeach
()
...
...
example/ck_tile/13_moe_sorting/CMakeLists.txt
View file @
4031a392
add_executable
(
tile_example_moe_sorting EXCLUDE_FROM_ALL moe_sorting.cpp moe_sorting_api.cpp
)
target_include_directories
(
tile_example_moe_sorting PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
)
add_dependencies
(
examples_ck_tile tile_example_moe_sorting
)
rocm_install
(
TARGETS tile_example_moe_sorting COMPONENT examples_ck_tile
)
set
(
EXAMPLE_MOE_SORTING_COMPILE_OPTIONS
)
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
list
(
APPEND EXAMPLE_MOE_SORTING_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal
)
...
...
example/ck_tile/14_moe_smoothquant/CMakeLists.txt
View file @
4031a392
...
...
@@ -5,6 +5,9 @@ function (add_moe_smoothquant_example TARGET_NAME MAIN_SRC)
add_executable
(
${
TARGET_NAME
}
EXCLUDE_FROM_ALL
${
MAIN_SRC
}
)
target_include_directories
(
${
TARGET_NAME
}
PRIVATE
${
CMAKE_CURRENT_LIST_DIR
}
)
add_dependencies
(
examples_ck_tile
${
TARGET_NAME
}
)
rocm_install
(
TARGETS
${
TARGET_NAME
}
COMPONENT examples_ck_tile
)
foreach
(
source IN LISTS ARGN
)
list
(
APPEND INSTANCE_SRCS
${
source
}
)
endforeach
()
...
...
example/ck_tile/15_fused_moe/CMakeLists.txt
View file @
4031a392
...
...
@@ -7,6 +7,9 @@ add_executable(${TILE_EXAPMLE_FUSED_MOE} EXCLUDE_FROM_ALL main.cpp)
target_include_directories
(
${
TILE_EXAPMLE_FUSED_MOE
}
PRIVATE
${
CMAKE_CURRENT_LIST_DIR
}
)
target_sources
(
${
TILE_EXAPMLE_FUSED_MOE
}
PRIVATE
${
INSTANCE_SRCS
}
)
add_dependencies
(
examples_ck_tile
${
TILE_EXAPMLE_FUSED_MOE
}
)
rocm_install
(
TARGETS
${
TILE_EXAPMLE_FUSED_MOE
}
COMPONENT examples_ck_tile
)
set
(
TILE_EXAPMLE_FUSED_MOE_COMPILE_OPTIONS
)
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
...
...
example/ck_tile/16_batched_gemm/CMakeLists.txt
View file @
4031a392
add_executable
(
tile_example_batched_gemm EXCLUDE_FROM_ALL batched_gemm.cpp
)
add_dependencies
(
examples_ck_tile tile_example_batched_gemm
)
rocm_install
(
TARGETS tile_example_batched_gemm COMPONENT examples_ck_tile
)
\ No newline at end of file
example/ck_tile/17_grouped_gemm/CMakeLists.txt
View file @
4031a392
add_executable
(
tile_example_grouped_gemm EXCLUDE_FROM_ALL grouped_gemm.cpp
)
add_dependencies
(
examples_ck_tile tile_example_grouped_gemm
)
rocm_install
(
TARGETS tile_example_grouped_gemm COMPONENT examples_ck_tile
)
\ No newline at end of file
example/ck_tile/35_batched_transpose/CMakeLists.txt
View file @
4031a392
...
...
@@ -2,8 +2,9 @@ set(TARGET_NAME tile_example_batched_transpose)
add_executable
(
${
TARGET_NAME
}
EXCLUDE_FROM_ALL batched_transpose_example.cpp batched_transpose_api.cpp
)
target_include_directories
(
${
TARGET_NAME
}
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
)
add_dependencies
(
examples_ck_tile
${
TARGET_NAME
}
)
rocm_install
(
TARGETS
${
TARGET_NAME
}
COMPONENT examples_ck_tile
)
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
list
(
APPEND EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal
)
# list(APPEND EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS -v --save-temps -Wno-gnu-line-marker)
target_compile_options
(
tile_example_batched_transpose PRIVATE
${
EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS
}
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment