Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
72c9f129
Commit
72c9f129
authored
Sep 20, 2024
by
Jun Liu
Browse files
Merge branch 'amd-develop' into amd-master
parents
241c261f
ded0d83d
Changes
235
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1142 additions
and
73 deletions
+1142
-73
CMakeLists.txt
CMakeLists.txt
+40
-13
Jenkinsfile
Jenkinsfile
+74
-17
client_example/07_grouped_convnd_fwd/CMakeLists.txt
client_example/07_grouped_convnd_fwd/CMakeLists.txt
+3
-3
client_example/10_grouped_convnd_bwd_data/CMakeLists.txt
client_example/10_grouped_convnd_bwd_data/CMakeLists.txt
+4
-2
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
+5
-2
client_example/16_convnd_fwd/CMakeLists.txt
client_example/16_convnd_fwd/CMakeLists.txt
+1
-1
client_example/20_splitk_gemm/CMakeLists.txt
client_example/20_splitk_gemm/CMakeLists.txt
+1
-1
client_example/24_grouped_conv_activation/CMakeLists.txt
client_example/24_grouped_conv_activation/CMakeLists.txt
+20
-4
client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/common.hpp
...activation/grouped_convnd_fwd_convscale_reduce/common.hpp
+834
-0
client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/conv3d_fwd_convscale_amax_fp8.cpp
...nd_fwd_convscale_reduce/conv3d_fwd_convscale_amax_fp8.cpp
+58
-0
client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/conv3d_fwd_convscale_relu_amax_fp8.cpp
...d_convscale_reduce/conv3d_fwd_convscale_relu_amax_fp8.cpp
+58
-0
client_example/CMakeLists.txt
client_example/CMakeLists.txt
+11
-2
codegen/CMakeLists.txt
codegen/CMakeLists.txt
+2
-2
codegen/test/CMakeLists.txt
codegen/test/CMakeLists.txt
+19
-12
codegen/test/rtc/CMakeLists.txt
codegen/test/rtc/CMakeLists.txt
+0
-2
codegen/test/rtc/include/rtc/compile_kernel.hpp
codegen/test/rtc/include/rtc/compile_kernel.hpp
+2
-2
codegen/test/rtc/include/rtc/tmp_dir.hpp
codegen/test/rtc/include/rtc/tmp_dir.hpp
+2
-2
codegen/test/rtc/src/compile_kernel.cpp
codegen/test/rtc/src/compile_kernel.cpp
+4
-4
codegen/test/rtc/src/tmp_dir.cpp
codegen/test/rtc/src/tmp_dir.cpp
+3
-3
docs/sphinx/requirements.in
docs/sphinx/requirements.in
+1
-1
No files found.
CMakeLists.txt
View file @
72c9f129
...
@@ -26,7 +26,23 @@ set(version 1.1.0)
...
@@ -26,7 +26,23 @@ set(version 1.1.0)
project
(
composable_kernel VERSION
${
version
}
LANGUAGES CXX HIP
)
project
(
composable_kernel VERSION
${
version
}
LANGUAGES CXX HIP
)
include
(
CTest
)
include
(
CTest
)
find_package
(
Python3 3.6 COMPONENTS Interpreter REQUIRED
)
# Usage: for customized Python location cmake -DCK_USE_ALTERNATIVE_PYTHON="/opt/Python-3.8.13/bin/python3.8"
# CK Codegen requires dataclass which is added in Python 3.7
# Python version 3.8 is required for general good practice as it is default for Ubuntu 20.04
if
(
NOT CK_USE_ALTERNATIVE_PYTHON
)
find_package
(
Python3 3.8 COMPONENTS Interpreter REQUIRED
)
else
()
message
(
"Using alternative python version"
)
set
(
EXTRA_PYTHON_PATH
)
# this is overly restrictive, we may need to be more flexible on the following
string
(
REPLACE
"/bin/python3.8"
""
EXTRA_PYTHON_PATH
"
${
CK_USE_ALTERNATIVE_PYTHON
}
"
)
message
(
"alternative python path is:
${
EXTRA_PYTHON_PATH
}
"
)
find_package
(
Python3 3.6 COMPONENTS Interpreter REQUIRED
)
add_definitions
(
-DPython3_EXECUTABLE=
"
${
CK_USE_ALTERNATIVE_PYTHON
}
"
)
set
(
Python3_EXECUTABLE
"
${
CK_USE_ALTERNATIVE_PYTHON
}
"
)
set
(
PYTHON_EXECUTABLE
"
${
CK_USE_ALTERNATIVE_PYTHON
}
"
)
set
(
ENV{LD_LIBRARY_PATH}
"
${
EXTRA_PYTHON_PATH
}
/lib:$ENV{LD_LIBRARY_PATH}"
)
endif
()
list
(
APPEND CMAKE_MODULE_PATH
"
${
PROJECT_SOURCE_DIR
}
/cmake"
)
list
(
APPEND CMAKE_MODULE_PATH
"
${
PROJECT_SOURCE_DIR
}
/cmake"
)
...
@@ -62,8 +78,14 @@ if (DTYPES)
...
@@ -62,8 +78,14 @@ if (DTYPES)
endif
()
endif
()
message
(
"DTYPES macro set to
${
DTYPES
}
"
)
message
(
"DTYPES macro set to
${
DTYPES
}
"
)
else
()
else
()
add_definitions
(
-DCK_ENABLE_INT8 -DCK_ENABLE_FP8 -DCK_ENABLE_BF8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_BF16
)
add_definitions
(
-DCK_ENABLE_INT8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_BF16 -DCK_ENABLE_FP8 -DCK_ENABLE_BF8
)
set
(
CK_ENABLE_ALL_DTYPES
"ON"
)
set
(
CK_ENABLE_INT8
"ON"
)
set
(
CK_ENABLE_FP16
"ON"
)
set
(
CK_ENABLE_FP32
"ON"
)
set
(
CK_ENABLE_FP64
"ON"
)
set
(
CK_ENABLE_BF16
"ON"
)
set
(
CK_ENABLE_FP8
"ON"
)
set
(
CK_ENABLE_BF8
"ON"
)
endif
()
endif
()
#for f8/bf8_t type
#for f8/bf8_t type
...
@@ -182,12 +204,18 @@ endif()
...
@@ -182,12 +204,18 @@ endif()
configure_file
(
include/ck/config.h.in
${
CMAKE_CURRENT_BINARY_DIR
}
/include/ck/config.h
)
configure_file
(
include/ck/config.h.in
${
CMAKE_CURRENT_BINARY_DIR
}
/include/ck/config.h
)
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
GREATER 500723302
)
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
GREATER 500723302
)
message
(
"Adding the fno-offload-uniform-block compiler flag"
)
check_cxx_compiler_flag
(
"-fno-offload-uniform-block"
HAS_NO_OFFLOAD_UNIFORM_BLOCK
)
add_compile_options
(
-fno-offload-uniform-block
)
if
(
HAS_NO_OFFLOAD_UNIFORM_BLOCK
)
message
(
"Adding the fno-offload-uniform-block compiler flag"
)
add_compile_options
(
-fno-offload-uniform-block
)
endif
()
endif
()
endif
()
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
GREATER 600140090
)
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
GREATER 600140090
)
message
(
"Adding the enable-post-misched=0 compiler flag"
)
check_cxx_compiler_flag
(
"-mllvm -enable-post-misched=0"
HAS_ENABLE_POST_MISCHED
)
add_compile_options
(
"SHELL: -mllvm -enable-post-misched=0"
)
if
(
HAS_ENABLE_POST_MISCHED
)
message
(
"Adding the enable-post-misched=0 compiler flag"
)
add_compile_options
(
"SHELL: -mllvm -enable-post-misched=0"
)
endif
()
endif
()
endif
()
set
(
check-coerce
)
set
(
check-coerce
)
check_cxx_compiler_flag
(
" -mllvm -amdgpu-coerce-illegal-types=1"
check-coerce
)
check_cxx_compiler_flag
(
" -mllvm -amdgpu-coerce-illegal-types=1"
check-coerce
)
...
@@ -541,12 +569,7 @@ if(NOT DEFINED INSTANCES_ONLY)
...
@@ -541,12 +569,7 @@ if(NOT DEFINED INSTANCES_ONLY)
PACKAGE_NAME examples
PACKAGE_NAME examples
)
)
add_subdirectory
(
example
)
add_subdirectory
(
example
)
if
(
GPU_TARGETS MATCHES
"gfx9"
AND NOT INSTANCES_ONLY
)
add_subdirectory
(
test
)
add_subdirectory
(
codegen
)
endif
()
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
endif
()
rocm_package_setup_component
(
profiler
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
LIBRARY_NAME composablekernel
...
@@ -563,6 +586,10 @@ if(NOT DEFINED INSTANCES_ONLY)
...
@@ -563,6 +586,10 @@ if(NOT DEFINED INSTANCES_ONLY)
endif
()
endif
()
endif
()
endif
()
if
(
NOT DEFINED PROFILER_ONLY
AND
(
GPU_TARGETS MATCHES
"gfx9"
OR DEFINED INSTANCES_ONLY
))
add_subdirectory
(
codegen
)
endif
()
#Create an interface target for the include only files and call it "composablekernels"
#Create an interface target for the include only files and call it "composablekernels"
include
(
CMakePackageConfigHelpers
)
include
(
CMakePackageConfigHelpers
)
...
...
Jenkinsfile
View file @
72c9f129
...
@@ -262,10 +262,19 @@ def cmake_build(Map conf=[:]){
...
@@ -262,10 +262,19 @@ def cmake_build(Map conf=[:]){
// reduce parallelism when compiling, clang uses too much memory
// reduce parallelism when compiling, clang uses too much memory
def
nt
=
nthreads
()
def
nt
=
nthreads
()
def
cmd
def
cmd
def
setup_cmd
def
build_cmd
def
execute_cmd
=
conf
.
get
(
"execute_cmd"
,
""
)
def
execute_cmd
=
conf
.
get
(
"execute_cmd"
,
""
)
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)){
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)){
def
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake ${setup_args} .. "
)
if
(
setup_args
.
contains
(
"gfx90a"
)
&&
params
.
NINJA_BUILD_TRACE
){
def
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} dumb-init make -j${nt} ${config_targets}"
)
echo
"running ninja build trace"
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake -G Ninja ${setup_args} .. "
)
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} ninja -j${nt} ${config_targets}"
)
}
else
{
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake ${setup_args} .. "
)
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} dumb-init make -j${nt} ${config_targets}"
)
}
cmd
=
conf
.
get
(
"cmd"
,
"""
cmd
=
conf
.
get
(
"cmd"
,
"""
${setup_cmd}
${setup_cmd}
${build_cmd}
${build_cmd}
...
@@ -281,7 +290,19 @@ def cmake_build(Map conf=[:]){
...
@@ -281,7 +290,19 @@ def cmake_build(Map conf=[:]){
echo
cmd
echo
cmd
dir
(
"build"
){
dir
(
"build"
){
//build CK
sh
cmd
sh
cmd
//run tests
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)){
if
(
setup_args
.
contains
(
"gfx90a"
)
&&
params
.
NINJA_BUILD_TRACE
){
sh
"/ninjatracing/ninjatracing .ninja_log > ck_build_trace.json"
archiveArtifacts
"ck_build_trace.json"
sh
"ninja test"
}
else
{
sh
"make check"
}
}
}
}
// Only archive from master or develop
// Only archive from master or develop
...
@@ -426,8 +447,9 @@ def runCKProfiler(Map conf=[:]){
...
@@ -426,8 +447,9 @@ def runCKProfiler(Map conf=[:]){
archiveArtifacts
"perf_resnet50_N4.log"
archiveArtifacts
"perf_resnet50_N4.log"
archiveArtifacts
"perf_batched_gemm.log"
archiveArtifacts
"perf_batched_gemm.log"
archiveArtifacts
"perf_grouped_gemm.log"
archiveArtifacts
"perf_grouped_gemm.log"
archiveArtifacts
"perf_conv_fwd.log"
archiveArtifacts
"perf_grouped_conv_fwd.log"
archiveArtifacts
"perf_conv_bwd_data.log"
archiveArtifacts
"perf_grouped_conv_bwd_data.log"
archiveArtifacts
"perf_grouped_conv_bwd_weight.log"
archiveArtifacts
"perf_gemm_bilinear.log"
archiveArtifacts
"perf_gemm_bilinear.log"
archiveArtifacts
"perf_reduction.log"
archiveArtifacts
"perf_reduction.log"
archiveArtifacts
"perf_splitK_gemm.log"
archiveArtifacts
"perf_splitK_gemm.log"
...
@@ -439,8 +461,9 @@ def runCKProfiler(Map conf=[:]){
...
@@ -439,8 +461,9 @@ def runCKProfiler(Map conf=[:]){
stash
name:
"perf_resnet50_N4.log"
stash
name:
"perf_resnet50_N4.log"
stash
name:
"perf_batched_gemm.log"
stash
name:
"perf_batched_gemm.log"
stash
name:
"perf_grouped_gemm.log"
stash
name:
"perf_grouped_gemm.log"
stash
name:
"perf_conv_fwd.log"
stash
name:
"perf_grouped_conv_fwd.log"
stash
name:
"perf_conv_bwd_data.log"
stash
name:
"perf_grouped_conv_bwd_data.log"
stash
name:
"perf_grouped_conv_bwd_weight.log"
stash
name:
"perf_gemm_bilinear.log"
stash
name:
"perf_gemm_bilinear.log"
stash
name:
"perf_reduction.log"
stash
name:
"perf_reduction.log"
stash
name:
"perf_splitK_gemm.log"
stash
name:
"perf_splitK_gemm.log"
...
@@ -541,7 +564,7 @@ def Build_CK(Map conf=[:]){
...
@@ -541,7 +564,7 @@ def Build_CK(Map conf=[:]){
cmake_build
(
conf
)
cmake_build
(
conf
)
dir
(
"build"
){
dir
(
"build"
){
//run tests and examples
//run tests and examples
sh
'make -j check'
//
sh 'make -j check'
if
(
params
.
RUN_PERFORMANCE_TESTS
&&
do_perf_tests
==
0
){
if
(
params
.
RUN_PERFORMANCE_TESTS
&&
do_perf_tests
==
0
){
//we only need the ckProfiler to run the performance tests, so we pack and stash it
//we only need the ckProfiler to run the performance tests, so we pack and stash it
//do not stash profiler on nodes where we don't need to run performance tests
//do not stash profiler on nodes where we don't need to run performance tests
...
@@ -648,8 +671,9 @@ def process_results(Map conf=[:]){
...
@@ -648,8 +671,9 @@ def process_results(Map conf=[:]){
unstash
"perf_resnet50_N4.log"
unstash
"perf_resnet50_N4.log"
unstash
"perf_batched_gemm.log"
unstash
"perf_batched_gemm.log"
unstash
"perf_grouped_gemm.log"
unstash
"perf_grouped_gemm.log"
unstash
"perf_conv_fwd.log"
unstash
"perf_grouped_conv_fwd.log"
unstash
"perf_conv_bwd_data.log"
unstash
"perf_grouped_conv_bwd_data.log"
unstash
"perf_grouped_conv_bwd_weight.log"
unstash
"perf_gemm_bilinear.log"
unstash
"perf_gemm_bilinear.log"
unstash
"perf_reduction.log"
unstash
"perf_reduction.log"
unstash
"perf_splitK_gemm.log"
unstash
"perf_splitK_gemm.log"
...
@@ -681,8 +705,8 @@ def process_results(Map conf=[:]){
...
@@ -681,8 +705,8 @@ def process_results(Map conf=[:]){
//launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
//launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
CRON_SETTINGS
=
BRANCH_NAME
==
"develop"
?
'''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.2; RUN_CK_TILE_TESTS=true
CRON_SETTINGS
=
BRANCH_NAME
==
"develop"
?
'''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.2; RUN_CK_TILE_TESTS=true
0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false
;NINJA_BUILD_TRACE=true
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false
;NINJA_BUILD_TRACE=true
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false'''
:
""
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false'''
:
""
pipeline
{
pipeline
{
...
@@ -746,6 +770,10 @@ pipeline {
...
@@ -746,6 +770,10 @@ pipeline {
name:
"RUN_PERFORMANCE_TESTS"
,
name:
"RUN_PERFORMANCE_TESTS"
,
defaultValue:
true
,
defaultValue:
true
,
description:
"Run the performance tests (default: ON)"
)
description:
"Run the performance tests (default: ON)"
)
booleanParam
(
name:
"RUN_GROUPED_CONV_LARGE_CASES_TESTS"
,
defaultValue:
false
,
description:
"Run the grouped conv large cases tests (default: OFF)"
)
booleanParam
(
booleanParam
(
name:
"RUN_CK_TILE_TESTS"
,
name:
"RUN_CK_TILE_TESTS"
,
defaultValue:
false
,
defaultValue:
false
,
...
@@ -758,7 +786,10 @@ pipeline {
...
@@ -758,7 +786,10 @@ pipeline {
name:
"BUILD_GFX12"
,
name:
"BUILD_GFX12"
,
defaultValue:
false
,
defaultValue:
false
,
description:
"Build CK and run tests on gfx12 (default: OFF)"
)
description:
"Build CK and run tests on gfx12 (default: OFF)"
)
booleanParam
(
name:
"NINJA_BUILD_TRACE"
,
defaultValue:
false
,
description:
"Generate a ninja build trace (default: OFF)"
)
}
}
environment
{
environment
{
dbuser
=
"${dbuser}"
dbuser
=
"${dbuser}"
...
@@ -792,6 +823,7 @@ pipeline {
...
@@ -792,6 +823,7 @@ pipeline {
}
}
agent
{
label
rocmnode
(
"nogpu"
)
}
agent
{
label
rocmnode
(
"nogpu"
)
}
environment
{
environment
{
setup_args
=
"NO_CK_BUILD"
execute_cmd
=
"find .. -not -path \'*.git*\' -iname \'*.h\' \
execute_cmd
=
"find .. -not -path \'*.git*\' -iname \'*.h\' \
-o -not -path \'*.git*\' -iname \'*.hpp\' \
-o -not -path \'*.git*\' -iname \'*.hpp\' \
-o -not -path \'*.git*\' -iname \'*.cpp\' \
-o -not -path \'*.git*\' -iname \'*.cpp\' \
...
@@ -808,7 +840,7 @@ pipeline {
...
@@ -808,7 +840,7 @@ pipeline {
--file-filter=*.cpp --force --enable=all --output-file=ck_cppcheck.log"
--file-filter=*.cpp --force --enable=all --output-file=ck_cppcheck.log"
}
}
steps
{
steps
{
buildHipClangJobAndReboot
(
setup_cmd:
""
,
build_cmd:
""
,
execute_cmd:
execute_cmd
,
no_reboot:
true
)
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
setup_cmd:
""
,
build_cmd:
""
,
execute_cmd:
execute_cmd
,
no_reboot:
true
)
archiveArtifacts
"build/ck_cppcheck.log"
archiveArtifacts
"build/ck_cppcheck.log"
cleanWs
()
cleanWs
()
}
}
...
@@ -820,6 +852,7 @@ pipeline {
...
@@ -820,6 +852,7 @@ pipeline {
}
}
agent
{
label
rocmnode
(
"nogpu"
)
}
agent
{
label
rocmnode
(
"nogpu"
)
}
environment
{
environment
{
setup_args
=
"NO_CK_BUILD"
execute_cmd
=
"find .. -not -path \'*.git*\' -iname \'*.h\' \
execute_cmd
=
"find .. -not -path \'*.git*\' -iname \'*.h\' \
-o -not -path \'*.git*\' -iname \'*.hpp\' \
-o -not -path \'*.git*\' -iname \'*.hpp\' \
-o -not -path \'*.git*\' -iname \'*.cpp\' \
-o -not -path \'*.git*\' -iname \'*.cpp\' \
...
@@ -831,7 +864,31 @@ pipeline {
...
@@ -831,7 +864,31 @@ pipeline {
| xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-12 -style=file {} | diff - {}\'"
| xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-12 -style=file {} | diff - {}\'"
}
}
steps
{
steps
{
buildHipClangJobAndReboot
(
setup_cmd:
""
,
build_cmd:
""
,
execute_cmd:
execute_cmd
,
no_reboot:
true
)
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
setup_cmd:
""
,
build_cmd:
""
,
execute_cmd:
execute_cmd
,
no_reboot:
true
)
cleanWs
()
}
}
}
}
stage
(
"Run Grouped Conv Large Case Tests"
)
{
parallel
{
stage
(
"Run Grouped Conv Large Case Tests on gfx90a"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_GROUPED_CONV_LARGE_CASES_TESTS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)}
environment
{
setup_args
=
"NO_CK_BUILD"
execute_args
=
""" ../script/cmake-ck-dev.sh ../ gfx90a && \
make -j64 test_grouped_convnd_fwd_large_cases_xdl && \
./bin/test_grouped_convnd_fwd_large_cases_xdl"""
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
cleanWs
()
}
}
}
}
...
@@ -936,10 +993,10 @@ pipeline {
...
@@ -936,10 +993,10 @@ pipeline {
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
environment
{
setup_args
=
""" -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="
gfx1100;
gfx90a" -DCMAKE_CXX_FLAGS=" -O3 " """
setup_args
=
""" -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx90a" -DCMAKE_CXX_FLAGS=" -O3 " """
execute_args
=
""" cd ../client_example && rm -rf build && mkdir build && cd build && \
execute_args
=
""" cd ../client_example && rm -rf build && mkdir build && cd build && \
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
-DGPU_TARGETS="
gfx1100;
gfx90a" \
-DGPU_TARGETS="gfx90a" \
-DCMAKE_CXX_COMPILER="${build_compiler()}" \
-DCMAKE_CXX_COMPILER="${build_compiler()}" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """
}
}
...
@@ -1043,7 +1100,7 @@ pipeline {
...
@@ -1043,7 +1100,7 @@ pipeline {
options
{
retry
(
1
)
}
options
{
retry
(
1
)
}
agent
{
label
rocmnode
(
"gfx90a"
)}
agent
{
label
rocmnode
(
"gfx90a"
)}
environment
{
environment
{
setup_args
=
"
"" -DGPU_TARGETS="gfx90a" -DBUILD_DEV=On ""
"
setup_args
=
"
NO_CK_BUILD
"
}
}
steps
{
steps
{
runPerfTest
(
setup_args:
setup_args
,
config_targets:
"ckProfiler"
,
no_reboot:
true
,
build_type:
'Release'
)
runPerfTest
(
setup_args:
setup_args
,
config_targets:
"ckProfiler"
,
no_reboot:
true
,
build_type:
'Release'
)
...
...
client_example/07_grouped_convnd_fwd/CMakeLists.txt
View file @
72c9f129
...
@@ -5,17 +5,17 @@ if(GPU_TARGETS MATCHES "gfx9")
...
@@ -5,17 +5,17 @@ if(GPU_TARGETS MATCHES "gfx9")
add_executable
(
client_grouped_conv1d_fwd grouped_conv1d_fwd.cpp
)
add_executable
(
client_grouped_conv1d_fwd grouped_conv1d_fwd.cpp
)
target_link_libraries
(
client_grouped_conv1d_fwd PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv1d_fwd PRIVATE composable_kernel::device_conv_operations
)
if
((
DTYPES MATCHES
"fp8"
)
OR NOT DEFINED DTYPES
)
if
((
DTYPES MATCHES
"fp8"
)
OR
(
NOT DEFINED DTYPES
AND GPU_TARGETS MATCHES
"gfx94"
)
)
add_executable
(
client_grouped_conv3d_fwd_fp8 grouped_conv3d_fwd_fp8.cpp
)
add_executable
(
client_grouped_conv3d_fwd_fp8 grouped_conv3d_fwd_fp8.cpp
)
target_link_libraries
(
client_grouped_conv3d_fwd_fp8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_fwd_fp8 PRIVATE composable_kernel::device_conv_operations
)
endif
()
endif
()
if
((
DTYPES MATCHES
"bf8"
)
OR NOT DEFINED DTYPES
)
if
((
DTYPES MATCHES
"bf8"
)
OR
(
NOT DEFINED DTYPES
AND GPU_TARGETS MATCHES
"gfx94"
)
)
add_executable
(
client_grouped_conv3d_fwd_bf8 grouped_conv3d_fwd_bf8.cpp
)
add_executable
(
client_grouped_conv3d_fwd_bf8 grouped_conv3d_fwd_bf8.cpp
)
target_link_libraries
(
client_grouped_conv3d_fwd_bf8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_fwd_bf8 PRIVATE composable_kernel::device_conv_operations
)
endif
()
endif
()
if
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"bf8"
)
OR NOT DEFINED DTYPES
)
if
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"bf8"
)
OR
(
NOT DEFINED DTYPES
AND GPU_TARGETS MATCHES
"gfx94"
)
)
add_executable
(
client_grouped_conv3d_fwd_fp8_bf8 grouped_conv3d_fwd_fp8_bf8.cpp
)
add_executable
(
client_grouped_conv3d_fwd_fp8_bf8 grouped_conv3d_fwd_fp8_bf8.cpp
)
target_link_libraries
(
client_grouped_conv3d_fwd_fp8_bf8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_fwd_fp8_bf8 PRIVATE composable_kernel::device_conv_operations
)
...
...
client_example/10_grouped_convnd_bwd_data/CMakeLists.txt
View file @
72c9f129
...
@@ -4,5 +4,7 @@ target_link_libraries(client_grouped_conv2d_bwd_data PRIVATE composable_kernel::
...
@@ -4,5 +4,7 @@ target_link_libraries(client_grouped_conv2d_bwd_data PRIVATE composable_kernel::
add_executable
(
client_grouped_conv3d_bwd_data grouped_conv3d_bwd_data.cpp
)
add_executable
(
client_grouped_conv3d_bwd_data grouped_conv3d_bwd_data.cpp
)
target_link_libraries
(
client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_conv_operations
)
add_executable
(
client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp
)
if
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"bf8"
)
OR
(
NOT DEFINED DTYPES AND GPU_TARGETS MATCHES
"gfx94"
))
target_link_libraries
(
client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_conv_operations
)
add_executable
(
client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp
)
target_link_libraries
(
client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_conv_operations
)
endif
()
\ No newline at end of file
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
View file @
72c9f129
...
@@ -2,10 +2,13 @@ add_executable(client_grouped_conv1d_bwd_weight_fp16 grouped_conv1d_bwd_weight_f
...
@@ -2,10 +2,13 @@ add_executable(client_grouped_conv1d_bwd_weight_fp16 grouped_conv1d_bwd_weight_f
add_executable
(
client_grouped_conv2d_bwd_weight_fp16 grouped_conv2d_bwd_weight_fp16.cpp
)
add_executable
(
client_grouped_conv2d_bwd_weight_fp16 grouped_conv2d_bwd_weight_fp16.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp16 grouped_conv3d_bwd_weight_fp16.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp16 grouped_conv3d_bwd_weight_fp16.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp32 grouped_conv3d_bwd_weight_fp32.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp32 grouped_conv3d_bwd_weight_fp32.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp
)
target_link_libraries
(
client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_conv_operations
)
if
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"bf8"
)
OR
(
NOT DEFINED DTYPES AND GPU_TARGETS MATCHES
"gfx94"
))
add_executable
(
client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_conv_operations
)
endif
()
\ No newline at end of file
client_example/16_convnd_fwd/CMakeLists.txt
View file @
72c9f129
...
@@ -4,7 +4,7 @@ if((DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES)
...
@@ -4,7 +4,7 @@ if((DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES)
endif
()
endif
()
if
((
DTYPES MATCHES
"fp8"
)
OR NOT DEFINED DTYPES
)
if
((
DTYPES MATCHES
"fp8"
)
OR
(
NOT DEFINED DTYPES
AND GPU_TARGETS MATCHES
"gfx94"
)
)
add_executable
(
client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp
)
add_executable
(
client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_conv_operations
)
endif
()
endif
()
...
...
client_example/20_splitk_gemm/CMakeLists.txt
View file @
72c9f129
if
(
GPU_TARGETS MATCHES
"gfx9"
AND
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"fp16"
)
OR NOT DEFINED DTYPES
))
if
((
DTYPES MATCHES
"fp8"
AND DTYPES MATCHES
"fp16"
)
OR
(
NOT DEFINED DTYPES
AND GPU_TARGETS MATCHES
"gfx94"
))
add_executable
(
client_splitK_gemm splitK_gemm_fp16_f8.cpp
)
add_executable
(
client_splitK_gemm splitK_gemm_fp16_f8.cpp
)
target_link_libraries
(
client_splitK_gemm PRIVATE composable_kernel::device_gemm_operations
)
target_link_libraries
(
client_splitK_gemm PRIVATE composable_kernel::device_gemm_operations
)
endif
()
endif
()
client_example/24_grouped_conv_activation/CMakeLists.txt
View file @
72c9f129
if
(
GPU_TARGETS MATCHES
"gfx9"
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
# Fwd scaleadd scaleadd relu
# Fwd scaleadd scaleadd relu
add_executable
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32
add_executable
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32
grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp
)
grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 PRIVATE composable_kernel::device_conv_operations
)
...
@@ -36,7 +36,7 @@ add_executable(client_grouped_convnd_fwd_bilinear_residual_fp16
...
@@ -36,7 +36,7 @@ add_executable(client_grouped_convnd_fwd_bilinear_residual_fp16
grouped_convnd_fwd_bilinear/grouped_conv_fwd_bilinear_residual_fp16.cpp
)
grouped_convnd_fwd_bilinear/grouped_conv_fwd_bilinear_residual_fp16.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_bilinear_residual_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_convnd_fwd_bilinear_residual_fp16 PRIVATE composable_kernel::device_conv_operations
)
# Fwd convinvscale
# Fwd convinvscale
add_executable
(
client_conv3d_fwd_convinvscale_fp8
add_executable
(
client_conv3d_fwd_convinvscale_fp8
grouped_convnd_fwd_convinvscale/conv3d_fwd_convinvscale_fp8.cpp
)
grouped_convnd_fwd_convinvscale/conv3d_fwd_convinvscale_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_convinvscale_fp8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_fwd_convinvscale_fp8 PRIVATE composable_kernel::device_conv_operations
)
# Fwd convscale + Bias
# Fwd convscale + Bias
...
@@ -47,6 +47,22 @@ target_link_libraries(client_conv3d_fwd_convscale_add_fp8 PRIVATE composable_ker
...
@@ -47,6 +47,22 @@ target_link_libraries(client_conv3d_fwd_convscale_add_fp8 PRIVATE composable_ker
add_executable
(
client_conv3d_fwd_convscale_relu_fp8
add_executable
(
client_conv3d_fwd_convscale_relu_fp8
grouped_convnd_fwd_convscale_relu/conv3d_fwd_convscale_relu_fp8.cpp
)
grouped_convnd_fwd_convscale_relu/conv3d_fwd_convscale_relu_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_convscale_relu_fp8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_fwd_convscale_relu_fp8 PRIVATE composable_kernel::device_conv_operations
)
# Fwd convscale + ReLU + AMAX
add_executable
(
client_conv3d_fwd_convscale_relu_amax_fp8
grouped_convnd_fwd_convscale_reduce/conv3d_fwd_convscale_relu_amax_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_convscale_relu_amax_fp8
PRIVATE composable_kernel::device_conv_operations
composable_kernel::device_other_operations
composable_kernel::device_reduction_operations
utility
)
# Fwd convscale + AMAX
add_executable
(
client_conv3d_fwd_convscale_amax_fp8
grouped_convnd_fwd_convscale_reduce/conv3d_fwd_convscale_amax_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_convscale_amax_fp8
PRIVATE composable_kernel::device_conv_operations
composable_kernel::device_other_operations
composable_kernel::device_reduction_operations
utility
)
# Fwd convscale
# Fwd convscale
add_executable
(
client_conv3d_fwd_convscale_fp8
add_executable
(
client_conv3d_fwd_convscale_fp8
grouped_convnd_fwd_convscale/conv3d_fwd_convscale_fp8.cpp
)
grouped_convnd_fwd_convscale/conv3d_fwd_convscale_fp8.cpp
)
...
@@ -56,11 +72,11 @@ add_executable(client_conv3d_fwd_convscale_bf8
...
@@ -56,11 +72,11 @@ add_executable(client_conv3d_fwd_convscale_bf8
grouped_convnd_fwd_convscale/conv3d_fwd_convscale_bf8.cpp
)
grouped_convnd_fwd_convscale/conv3d_fwd_convscale_bf8.cpp
)
target_link_libraries
(
client_conv3d_fwd_convscale_bf8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_fwd_convscale_bf8 PRIVATE composable_kernel::device_conv_operations
)
add_executable
(
client_conv3d_fwd_convscale_fp8_bf8
add_executable
(
client_conv3d_fwd_convscale_fp8_bf8
grouped_convnd_fwd_convscale/conv3d_fwd_convscale_fp8_bf8.cpp
)
grouped_convnd_fwd_convscale/conv3d_fwd_convscale_fp8_bf8.cpp
)
target_link_libraries
(
client_conv3d_fwd_convscale_fp8_bf8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_fwd_convscale_fp8_bf8 PRIVATE composable_kernel::device_conv_operations
)
add_executable
(
client_conv3d_fwd_convscale_bf8_fp8
add_executable
(
client_conv3d_fwd_convscale_bf8_fp8
grouped_convnd_fwd_convscale/conv3d_fwd_convscale_bf8_fp8.cpp
)
grouped_convnd_fwd_convscale/conv3d_fwd_convscale_bf8_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_convscale_bf8_fp8 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_conv3d_fwd_convscale_bf8_fp8 PRIVATE composable_kernel::device_conv_operations
)
# Bwd data bilinear
# Bwd data bilinear
...
...
client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/common.hpp
0 → 100644
View file @
72c9f129
This diff is collapsed.
Click to expand it.
client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/conv3d_fwd_convscale_amax_fp8.cpp
0 → 100644
View file @
72c9f129
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
using
InDataType
=
ck
::
f8_t
;
using
WeiDataType
=
ck
::
f8_t
;
using
CShuffleDataType
=
float
;
using
ConvOutDataType
=
float
;
// data type of convolution result
using
OutDataType
=
ck
::
f8_t
;
// data type of final result
using
AComputeDataType
=
ck
::
f8_t
;
using
BComputeDataType
=
ck
::
f8_t
;
using
ConvElementOp
=
ConvScale
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKZYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGK
;
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
AMAX
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
3
;
static
constexpr
ck
::
index_t
G
=
1
;
static
constexpr
ck
::
index_t
N
=
64
;
static
constexpr
ck
::
index_t
K
=
128
;
static
constexpr
ck
::
index_t
C
=
64
;
static
constexpr
ck
::
index_t
Z
=
3
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
Di
=
28
;
static
constexpr
ck
::
index_t
Hi
=
28
;
static
constexpr
ck
::
index_t
Wi
=
3
;
static
constexpr
ck
::
index_t
Do
=
28
;
static
constexpr
ck
::
index_t
Ho
=
28
;
static
constexpr
ck
::
index_t
Wo
=
3
;
int
main
()
{
return
run_grouped_conv_fwd_convscale_reduce
<
NumDimSpatial
,
InDataType
,
WeiDataType
,
ConvOutDataType
,
OutDataType
,
ConvElementOp
,
ReduceOpId
,
InLayout
,
WeiLayout
,
OutLayout
,
3
,
AComputeDataType
,
BComputeDataType
>
(
{
N
,
Di
,
Hi
,
Wi
,
G
,
C
},
{
G
,
K
,
Z
,
Y
,
X
,
C
},
{
N
,
Do
,
Ho
,
Wo
,
G
,
K
})
?
EXIT_SUCCESS
:
EXIT_FAILURE
;
}
client_example/24_grouped_conv_activation/grouped_convnd_fwd_convscale_reduce/conv3d_fwd_convscale_relu_amax_fp8.cpp
0 → 100644
View file @
72c9f129
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
using
InDataType
=
ck
::
f8_t
;
using
WeiDataType
=
ck
::
f8_t
;
using
CShuffleDataType
=
float
;
using
ConvOutDataType
=
float
;
// data type of convolution result
using
OutDataType
=
ck
::
f8_t
;
// data type of final result
using
AComputeDataType
=
ck
::
f8_t
;
using
BComputeDataType
=
ck
::
f8_t
;
using
ConvElementOp
=
ConvScaleRelu
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKZYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGK
;
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
AMAX
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
3
;
static
constexpr
ck
::
index_t
G
=
1
;
static
constexpr
ck
::
index_t
N
=
64
;
static
constexpr
ck
::
index_t
K
=
128
;
static
constexpr
ck
::
index_t
C
=
64
;
static
constexpr
ck
::
index_t
Z
=
3
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
Di
=
28
;
static
constexpr
ck
::
index_t
Hi
=
28
;
static
constexpr
ck
::
index_t
Wi
=
3
;
static
constexpr
ck
::
index_t
Do
=
28
;
static
constexpr
ck
::
index_t
Ho
=
28
;
static
constexpr
ck
::
index_t
Wo
=
3
;
int
main
()
{
return
run_grouped_conv_fwd_convscale_reduce
<
NumDimSpatial
,
InDataType
,
WeiDataType
,
ConvOutDataType
,
OutDataType
,
ConvElementOp
,
ReduceOpId
,
InLayout
,
WeiLayout
,
OutLayout
,
3
,
AComputeDataType
,
BComputeDataType
>
(
{
N
,
Di
,
Hi
,
Wi
,
G
,
C
},
{
G
,
K
,
Z
,
Y
,
X
,
C
},
{
N
,
Do
,
Ho
,
Wo
,
G
,
K
})
?
EXIT_SUCCESS
:
EXIT_FAILURE
;
}
client_example/CMakeLists.txt
View file @
72c9f129
...
@@ -34,8 +34,17 @@ if (DTYPES)
...
@@ -34,8 +34,17 @@ if (DTYPES)
endif
()
endif
()
message
(
"DTYPES macro set to
${
DTYPES
}
"
)
message
(
"DTYPES macro set to
${
DTYPES
}
"
)
else
()
else
()
add_definitions
(
-DCK_ENABLE_INT8 -DCK_ENABLE_FP8 -DCK_ENABLE_BF8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_BF16
)
add_definitions
(
-DCK_ENABLE_INT8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_BF16
)
set
(
CK_ENABLE_ALL_DTYPES
"ON"
)
set
(
CK_ENABLE_INT8
"ON"
)
set
(
CK_ENABLE_FP16
"ON"
)
set
(
CK_ENABLE_FP32
"ON"
)
set
(
CK_ENABLE_FP64
"ON"
)
set
(
CK_ENABLE_BF16
"ON"
)
if
(
GPU_TARGETS MATCHES
"gfx94"
)
add_definitions
(
-DCK_ENABLE_FP8 -DCK_ENABLE_BF8
)
set
(
CK_ENABLE_FP8
"ON"
)
set
(
CK_ENABLE_BF8
"ON"
)
endif
()
endif
()
endif
()
if
(
GPU_TARGETS
)
if
(
GPU_TARGETS
)
...
...
codegen/CMakeLists.txt
View file @
72c9f129
...
@@ -27,6 +27,8 @@ file(GLOB_RECURSE KERNEL_FILES CONFIGURE_DEPENDS
...
@@ -27,6 +27,8 @@ file(GLOB_RECURSE KERNEL_FILES CONFIGURE_DEPENDS
add_embed_library
(
ck_headers
${
KERNEL_FILES
}
RELATIVE
${
CK_ROOT
}
/include
)
add_embed_library
(
ck_headers
${
KERNEL_FILES
}
RELATIVE
${
CK_ROOT
}
/include
)
file
(
GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp
)
file
(
GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp
)
##message(STATUS "SOURCE_FILES: ${SOURCES}")
# TODO: Use object library
# TODO: Use object library
add_library
(
ck_host STATIC
${
SOURCES
}
)
add_library
(
ck_host STATIC
${
SOURCES
}
)
target_link_libraries
(
ck_host PRIVATE ck_headers
)
target_link_libraries
(
ck_host PRIVATE ck_headers
)
...
@@ -48,6 +50,4 @@ rocm_install(
...
@@ -48,6 +50,4 @@ rocm_install(
)
)
rocm_install
(
DIRECTORY include/ck DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
rocm_install
(
DIRECTORY include/ck DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
add_subdirectory
(
test
)
endif
()
codegen/test/CMakeLists.txt
View file @
72c9f129
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
add_subdirectory
(
rtc
)
add_subdirectory
(
rtc
)
file
(
GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp
)
file
(
GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp
)
foreach
(
TEST_SRC
${
TEST_SRCS
}
)
if
(
NOT INSTANCES_ONLY
)
set_source_files_properties
(
${
TEST_SRC
}
PROPERTIES LANGUAGE HIP
)
foreach
(
TEST_SRC
${
TEST_SRCS
}
)
get_filename_component
(
BASE_NAME
${
TEST_SRC
}
NAME_WE
)
set_source_files_properties
(
${
TEST_SRC
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
test_host_
${
BASE_NAME
}
${
TEST_SRC
}
)
get_filename_component
(
BASE_NAME
${
TEST_SRC
}
NAME_WE
)
add_dependencies
(
codegen test_host_
${
BASE_NAME
}
)
add_executable
(
codegen_test_
${
BASE_NAME
}
${
TEST_SRC
}
)
add_test
(
NAME codegen_test_
${
BASE_NAME
}
COMMAND test_host_
${
BASE_NAME
}
)
if
(
CK_USE_ALTERNATIVE_PYTHON
)
target_link_libraries
(
test_host_
${
BASE_NAME
}
ck_rtc ck_host
)
target_link_options
(
codegen_test_
${
BASE_NAME
}
PRIVATE -lstdc++fs
)
# target_link_libraries(test_host_${BASE_NAME} ${CK_ROOT}/build/lib/libutility.a)
endif
()
target_include_directories
(
test_host_
${
BASE_NAME
}
PUBLIC
include
())
add_dependencies
(
codegen codegen_test_
${
BASE_NAME
}
)
target_include_directories
(
test_host_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/include
)
add_dependencies
(
tests codegen_test_
${
BASE_NAME
}
)
target_include_directories
(
test_host_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/library/include
)
add_dependencies
(
check codegen_test_
${
BASE_NAME
}
)
endforeach
()
add_test
(
NAME codegen_test_
${
BASE_NAME
}
COMMAND codegen_test_
${
BASE_NAME
}
)
message
(
"adding test codegen_test_
${
BASE_NAME
}
"
)
target_link_libraries
(
codegen_test_
${
BASE_NAME
}
ck_rtc ck_host
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/codegen/test/include
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/include
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/library/include
)
endforeach
()
endif
()
codegen/test/rtc/CMakeLists.txt
View file @
72c9f129
find_package
(
hip
)
file
(
GLOB RTC_SOURCES CONFIGURE_DEPENDS src/*.cpp
)
file
(
GLOB RTC_SOURCES CONFIGURE_DEPENDS src/*.cpp
)
add_library
(
ck_rtc
${
RTC_SOURCES
}
)
add_library
(
ck_rtc
${
RTC_SOURCES
}
)
target_include_directories
(
ck_rtc PUBLIC include
)
target_include_directories
(
ck_rtc PUBLIC include
)
...
...
codegen/test/rtc/include/rtc/compile_kernel.hpp
View file @
72c9f129
...
@@ -2,14 +2,14 @@
...
@@ -2,14 +2,14 @@
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL
#include <rtc/kernel.hpp>
#include <rtc/kernel.hpp>
#include <filesystem>
#include <
ck/
filesystem
.hpp
>
#include <string>
#include <string>
namespace
rtc
{
namespace
rtc
{
struct
src_file
struct
src_file
{
{
std
::
filesystem
::
path
path
;
CK
::
fs
::
path
path
;
std
::
string_view
content
;
std
::
string_view
content
;
};
};
...
...
codegen/test/rtc/include/rtc/tmp_dir.hpp
View file @
72c9f129
...
@@ -2,13 +2,13 @@
...
@@ -2,13 +2,13 @@
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_TMP_DIR
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_TMP_DIR
#include <string>
#include <string>
#include <filesystem>
#include <
ck/
filesystem
.hpp
>
namespace
rtc
{
namespace
rtc
{
struct
tmp_dir
struct
tmp_dir
{
{
std
::
filesystem
::
path
path
;
CK
::
fs
::
path
path
;
tmp_dir
(
const
std
::
string
&
prefix
=
""
);
tmp_dir
(
const
std
::
string
&
prefix
=
""
);
void
execute
(
const
std
::
string
&
cmd
)
const
;
void
execute
(
const
std
::
string
&
cmd
)
const
;
...
...
codegen/test/rtc/src/compile_kernel.cpp
View file @
72c9f129
...
@@ -70,9 +70,9 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
...
@@ -70,9 +70,9 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
for
(
const
auto
&
src
:
srcs
)
for
(
const
auto
&
src
:
srcs
)
{
{
std
::
filesystem
::
path
full_path
=
td
.
path
/
src
.
path
;
CK
::
fs
::
path
full_path
=
td
.
path
/
src
.
path
;
std
::
filesystem
::
path
parent_path
=
full_path
.
parent_path
();
CK
::
fs
::
path
parent_path
=
full_path
.
parent_path
();
std
::
filesystem
::
create_directories
(
parent_path
);
CK
::
fs
::
create_directories
(
parent_path
);
write_string
(
full_path
.
string
(),
src
.
content
);
write_string
(
full_path
.
string
(),
src
.
content
);
if
(
src
.
path
.
extension
().
string
()
==
".cpp"
)
if
(
src
.
path
.
extension
().
string
()
==
".cpp"
)
{
{
...
@@ -86,7 +86,7 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
...
@@ -86,7 +86,7 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
td
.
execute
(
compiler
()
+
options
.
flags
);
td
.
execute
(
compiler
()
+
options
.
flags
);
auto
out_path
=
td
.
path
/
out
;
auto
out_path
=
td
.
path
/
out
;
if
(
not
std
::
filesystem
::
exists
(
out_path
))
if
(
not
CK
::
fs
::
exists
(
out_path
))
throw
std
::
runtime_error
(
"Output file missing: "
+
out
);
throw
std
::
runtime_error
(
"Output file missing: "
+
out
);
auto
obj
=
read_buffer
(
out_path
.
string
());
auto
obj
=
read_buffer
(
out_path
.
string
());
...
...
codegen/test/rtc/src/tmp_dir.cpp
View file @
72c9f129
...
@@ -31,10 +31,10 @@ std::string unique_string(const std::string& prefix)
...
@@ -31,10 +31,10 @@ std::string unique_string(const std::string& prefix)
}
}
tmp_dir
::
tmp_dir
(
const
std
::
string
&
prefix
)
tmp_dir
::
tmp_dir
(
const
std
::
string
&
prefix
)
:
path
(
std
::
filesystem
::
temp_directory_path
()
/
:
path
(
CK
::
fs
::
temp_directory_path
()
/
unique_string
(
prefix
.
empty
()
?
"ck-rtc"
:
"ck-rtc-"
+
prefix
))
unique_string
(
prefix
.
empty
()
?
"ck-rtc"
:
"ck-rtc-"
+
prefix
))
{
{
std
::
filesystem
::
create_directories
(
this
->
path
);
CK
::
fs
::
create_directories
(
this
->
path
);
}
}
void
tmp_dir
::
execute
(
const
std
::
string
&
cmd
)
const
void
tmp_dir
::
execute
(
const
std
::
string
&
cmd
)
const
...
@@ -43,6 +43,6 @@ void tmp_dir::execute(const std::string& cmd) const
...
@@ -43,6 +43,6 @@ void tmp_dir::execute(const std::string& cmd) const
std
::
system
(
s
.
c_str
());
std
::
system
(
s
.
c_str
());
}
}
tmp_dir
::~
tmp_dir
()
{
std
::
filesystem
::
remove_all
(
this
->
path
);
}
tmp_dir
::~
tmp_dir
()
{
CK
::
fs
::
remove_all
(
this
->
path
);
}
}
// namespace rtc
}
// namespace rtc
docs/sphinx/requirements.in
View file @
72c9f129
rocm-docs-core==1.
6
.2
rocm-docs-core==1.
7
.2
sphinxcontrib-bibtex==2.6.2
sphinxcontrib-bibtex==2.6.2
Prev
1
2
3
4
5
…
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment