Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
63b152d6
"vscode:/vscode.git/clone" did not exist on "0d26477a864fcaf6f1dfe709c9f8421e5305b933"
Commit
63b152d6
authored
Oct 17, 2024
by
danyao12
Browse files
Merge branch 'develop' into ck_tile/fa_bwd_v3
parents
ae2d7d2b
14c3cfb1
Changes
132
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
379 additions
and
192 deletions
+379
-192
CMakeLists.txt
CMakeLists.txt
+50
-65
Jenkinsfile
Jenkinsfile
+139
-39
README.md
README.md
+7
-5
cmake/Embed.cmake
cmake/Embed.cmake
+3
-1
codegen/CMakeLists.txt
codegen/CMakeLists.txt
+31
-26
codegen/test/CMakeLists.txt
codegen/test/CMakeLists.txt
+20
-17
codegen/test/include/common.hpp
codegen/test/include/common.hpp
+0
-0
codegen/test/rtc/CMakeLists.txt
codegen/test/rtc/CMakeLists.txt
+2
-0
codegen/test/rtc/include/rtc/compile_kernel.hpp
codegen/test/rtc/include/rtc/compile_kernel.hpp
+2
-2
codegen/test/rtc/include/rtc/filesystem.hpp
codegen/test/rtc/include/rtc/filesystem.hpp
+60
-0
codegen/test/rtc/include/rtc/tmp_dir.hpp
codegen/test/rtc/include/rtc/tmp_dir.hpp
+2
-2
codegen/test/rtc/src/compile_kernel.cpp
codegen/test/rtc/src/compile_kernel.cpp
+5
-5
codegen/test/rtc/src/tmp_dir.cpp
codegen/test/rtc/src/tmp_dir.cpp
+3
-3
docs/reference/API_Reference_Guide.rst
docs/reference/API_Reference_Guide.rst
+0
-6
docs/sphinx/requirements.in
docs/sphinx/requirements.in
+1
-1
docs/sphinx/requirements.txt
docs/sphinx/requirements.txt
+1
-1
example/01_gemm/common.hpp
example/01_gemm/common.hpp
+17
-16
example/01_gemm/gemm_dl_fp16.cpp
example/01_gemm/gemm_dl_fp16.cpp
+12
-1
example/01_gemm/gemm_dl_fp32.cpp
example/01_gemm/gemm_dl_fp32.cpp
+12
-1
example/01_gemm/gemm_dl_int8.cpp
example/01_gemm/gemm_dl_int8.cpp
+12
-1
No files found.
CMakeLists.txt
View file @
63b152d6
...
...
@@ -97,10 +97,9 @@ if(DL_KERNELS)
add_definitions
(
-DDL_KERNELS
)
set
(
CK_ENABLE_DL_KERNELS
"ON"
)
endif
()
if
(
INSTANCES_ONLY
)
add_definitions
(
-DINSTANCES_ONLY
)
set
(
CK_ENABLE_INSTANCES_ONLY
"ON"
)
option
(
CK_USE_CODEGEN
"Enable codegen library"
OFF
)
if
(
CK_USE_CODEGEN
)
add_definitions
(
-DCK_USE_CODEGEN
)
endif
()
include
(
getopt
)
...
...
@@ -127,7 +126,17 @@ rocm_setup_version(VERSION ${version})
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
"$ENV{ROCM_PATH}"
"$ENV{HIP_PATH}"
)
message
(
"GPU_TARGETS=
${
GPU_TARGETS
}
"
)
message
(
"GPU_ARCHS=
${
GPU_ARCHS
}
"
)
if
(
GPU_ARCHS
)
#disable GPU_TARGETS to avoid conflicts, this needs to happen before we call hip package
unset
(
GPU_TARGETS CACHE
)
unset
(
AMDGPU_TARGETS CACHE
)
endif
()
if
(
GPU_TARGETS
)
set
(
USER_GPU_TARGETS 1
)
else
()
set
(
USER_GPU_TARGETS 0
)
endif
()
find_package
(
hip
)
# No assumption that HIP kernels are launched with uniform block size for backward compatibility
# SWDEV-413293 and https://reviews.llvm.org/D155213
...
...
@@ -135,56 +144,39 @@ math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR})
message
(
"hip_version_flat=
${
hip_VERSION_FLAT
}
"
)
message
(
"checking which targets are supported"
)
#This is the list of targets to be used in case GPU_TARGETS is not set on command line
#These targets will be filtered and only supported ones will be used
#Setting GPU_TARGETS on command line will override this list
if
(
NOT PROFILER_ONLY
)
if
(
NOT ENABLE_ASAN_PACKAGING
)
#build CK for all supported targets
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
LESS 600300000
)
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS
TARGETS
"gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102"
)
else
()
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS
TARGETS
"gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
)
endif
()
#In order to build just the CK library (without tests and examples) for all supported GPU targets
#use -D GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
#the GPU_TARGETS flag will be reset in this case in order to avoid conflicts.
#
#In order to build CK along with all tests and examples it should be OK to set GPU_TARGETS to just 1 or 2 similar architectures.
if
(
NOT ENABLE_ASAN_PACKAGING
)
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
LESS 600300000
)
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
set
(
CK_GPU_TARGETS
"gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102"
)
else
()
#build CK only for xnack-supported targets
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS
TARGETS
"gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+"
)
set
(
GPU_TARGETS
"
${
DEFAULT_GPU_TARGETS
}
"
CACHE STRING
" "
FORCE
)
set
(
CK_GPU_TARGETS
"gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
)
endif
()
else
()
add_definitions
(
-DPROFILER_ONLY
)
set
(
GPU_TARGETS
""
CACHE STRING
""
FORCE
)
if
(
GPU_TARGETS
)
message
(
FATAL_ERROR
"For PROFILE_ONLY build, please do not set GPU_TARGETS, use GPU_ARCH = gfx90, gfx94, gfx10, gfx11 or gfx12"
)
endif
()
if
(
GPU_ARCH MATCHES
"gfx90"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx908;gfx90a"
)
elseif
(
GPU_ARCH MATCHES
"gfx94"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx940;gfx941;gfx942"
)
elseif
(
GPU_ARCH MATCHES
"gfx10"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx1030"
)
elseif
(
GPU_ARCH MATCHES
"gfx11"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx1100;gfx1101;gfx1102"
)
elseif
(
GPU_ARCH MATCHES
"gfx12"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx1200;gfx1201"
)
else
()
message
(
FATAL_ERROR
"For PROFILE_ONLY build, please specify GPU_ARCH as gfx90, gfx94, gfx10, gfx11 or gfx12"
)
endif
()
set
(
GPU_TARGETS
"
${
DEFAULT_GPU_TARGETS
}
"
CACHE STRING
" "
FORCE
)
#build CK only for xnack-supported targets when using ASAN
set
(
CK_GPU_TARGETS
"gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+"
)
endif
()
message
(
"Supported
GPU_
T
AR
GETS=
${
DEFAULT_GPU_TARGETS
}
"
)
if
(
GPU_
T
AR
GET
S
)
message
(
"Building CK for the following targets:
${
GPU_TARGETS
}
"
)
#if user set
GPU_AR
CHS on the cmake command line, overwrite default target list with user's list
#otherwise, if user set GPU_TARGETS, use that set of targets
if
(
GPU_AR
CH
S
)
set
(
CK_GPU_TARGETS
${
GPU_ARCHS
}
)
else
()
message
(
"Building CK for the default targets:
${
DEFAULT_GPU_TARGETS
}
"
)
if
(
USER_GPU_TARGETS
)
set
(
CK_GPU_TARGETS
${
GPU_TARGETS
}
)
endif
()
endif
()
#make sure all the targets on the list are actually supported by the current compiler
rocm_check_target_ids
(
SUPPORTED_GPU_TARGETS
TARGETS
${
CK_GPU_TARGETS
}
)
message
(
"Building CK for the following targets:
${
SUPPORTED_GPU_TARGETS
}
"
)
if
(
GPU_TARGETS
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_definitions
(
-DCK_USE_XDL
)
...
...
@@ -557,8 +549,7 @@ ENDFOREACH()
add_custom_target
(
instances DEPENDS utility;
${
CK_DEVICE_INSTANCES
}
SOURCES
${
INSTANCE_FILES
}
)
add_subdirectory
(
library
)
if
(
NOT DEFINED INSTANCES_ONLY
)
if
(
NOT DEFINED PROFILER_ONLY
)
if
(
NOT GPU_ARCHS AND USER_GPU_TARGETS
)
rocm_package_setup_component
(
tests
LIBRARY_NAME composablekernel
PACKAGE_NAME tests
# Prevent -static suffix on package name
...
...
@@ -569,24 +560,18 @@ if(NOT DEFINED INSTANCES_ONLY)
PACKAGE_NAME examples
)
add_subdirectory
(
example
)
add_subdirectory
(
test
)
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler
)
add_subdirectory
(
profiler
)
else
()
#When building PROFILER_ONLY, label the package with GPU_ARCH
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler_
${
GPU_ARCH
}
)
add_subdirectory
(
profiler
)
endif
()
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
endif
()
endif
()
if
(
NOT DEFINED PROFILER_ONLY
AND
(
GPU_TARGETS MATCHES
"gfx9"
OR DEFINED INSTANCES_ONLY
))
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler
)
add_subdirectory
(
profiler
)
if
(
CK_USE_CODEGEN
AND
(
GPU_TARGETS MATCHES
"gfx9"
OR GPU_ARCHS
))
add_subdirectory
(
codegen
)
endif
()
...
...
Jenkinsfile
View file @
63b152d6
...
...
@@ -100,7 +100,15 @@ def getDockerImage(Map conf=[:]){
dockerArgs
=
dockerArgs
+
" --no-cache "
}
echo
"Docker Args: ${dockerArgs}"
def
image
=
getDockerImageName
()
def
image
if
(
params
.
BUILD_LEGACY_OS
&&
conf
.
get
(
"docker_name"
,
""
)
!=
""
){
image
=
conf
.
get
(
"docker_name"
,
""
)
echo
"Using legacy docker: ${image}"
}
else
{
image
=
getDockerImageName
()
echo
"Using default docker: ${image}"
}
//Check if image exists
def
retimage
try
...
...
@@ -125,7 +133,9 @@ def buildDocker(install_prefix){
def
image_name
=
getDockerImageName
()
echo
"Building Docker for ${image_name}"
def
dockerArgs
=
"--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' --build-arg DISABLE_CACHE='git rev-parse ${params.COMPILER_VERSION}' "
if
(
params
.
COMPILER_VERSION
==
"amd-staging"
||
params
.
COMPILER_VERSION
==
"amd-mainline-open"
||
params
.
COMPILER_COMMIT
!=
""
){
dockerArgs
=
dockerArgs
+
" --no-cache "
}
echo
"Build Args: ${dockerArgs}"
try
{
if
(
params
.
BUILD_DOCKER
){
...
...
@@ -259,6 +269,7 @@ def cmake_build(Map conf=[:]){
"""
)
sh
cmd3
}
// reduce parallelism when compiling, clang uses too much memory
def
nt
=
nthreads
()
def
cmd
...
...
@@ -273,7 +284,7 @@ def cmake_build(Map conf=[:]){
}
else
{
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake ${setup_args} .. "
)
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs}
dumb-init
make -j${nt} ${config_targets}"
)
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} make -j${nt} ${config_targets}"
)
}
cmd
=
conf
.
get
(
"cmd"
,
"""
${setup_cmd}
...
...
@@ -292,8 +303,8 @@ def cmake_build(Map conf=[:]){
dir
(
"build"
){
//build CK
sh
cmd
//run tests
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)){
//run tests
except when NO_CK_BUILD or BUILD_LEGACY_OS are set
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)
&&
!
params
.
BUILD_LEGACY_OS
){
if
(
setup_args
.
contains
(
"gfx90a"
)
&&
params
.
NINJA_BUILD_TRACE
){
sh
"/ninjatracing/ninjatracing .ninja_log > ck_build_trace.json"
archiveArtifacts
"ck_build_trace.json"
...
...
@@ -309,7 +320,7 @@ def cmake_build(Map conf=[:]){
if
(
package_build
==
true
&&
(
env
.
BRANCH_NAME
==
"develop"
||
env
.
BRANCH_NAME
==
"amd-master"
))
{
archiveArtifacts
artifacts:
"build/*.deb"
,
allowEmptyArchive:
true
,
fingerprint:
true
}
if
(
params
.
RUN_CK_TILE_TESTS
){
if
(
params
.
RUN_CK_TILE_
FMHA_
TESTS
){
try
{
archiveArtifacts
"perf_fmha_fwd_*.log"
archiveArtifacts
"perf_fmha_bwd_*.log"
...
...
@@ -330,11 +341,19 @@ def buildHipClangJob(Map conf=[:]){
env
.
HSA_ENABLE_SDMA
=
0
checkout
scm
def
image
=
getDockerImageName
()
def
image
if
(
params
.
BUILD_LEGACY_OS
&&
conf
.
get
(
"docker_name"
,
""
)
!=
""
){
image
=
conf
.
get
(
"docker_name"
,
""
)
echo
"Using legacy docker: ${image}"
}
else
{
image
=
getDockerImageName
()
echo
"Using default docker: ${image}"
}
def
prefixpath
=
conf
.
get
(
"prefixpath"
,
"/opt/rocm"
)
// Jenkins is complaining about the render group
def
dockerOpts
=
"
--rm
--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
def
dockerOpts
=
"--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if
(
conf
.
get
(
"enforce_xnack_on"
,
false
))
{
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
}
...
...
@@ -352,7 +371,7 @@ def buildHipClangJob(Map conf=[:]){
def
retimage
(
retimage
,
image
)
=
getDockerImage
(
conf
)
gitStatusWrapper
(
credentialsId:
"${
status_wrapper
_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
gitStatusWrapper
(
credentialsId:
"${
env.ck_git
_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
timeout
(
time:
48
,
unit:
'HOURS'
)
{
...
...
@@ -393,7 +412,7 @@ def runCKProfiler(Map conf=[:]){
def
prefixpath
=
conf
.
get
(
"prefixpath"
,
"/opt/rocm"
)
// Jenkins is complaining about the render group
def
dockerOpts
=
"
--rm
--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
def
dockerOpts
=
"--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if
(
conf
.
get
(
"enforce_xnack_on"
,
false
))
{
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
}
...
...
@@ -407,7 +426,7 @@ def runCKProfiler(Map conf=[:]){
def
variant
=
env
.
STAGE_NAME
def
retimage
gitStatusWrapper
(
credentialsId:
"${
status_wrapper
_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
gitStatusWrapper
(
credentialsId:
"${
env.ck_git
_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
try
{
(
retimage
,
image
)
=
getDockerImage
(
conf
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
...
...
@@ -512,11 +531,20 @@ def Build_CK(Map conf=[:]){
env
.
DOCKER_BUILDKIT
=
1
checkout
scm
def
image
=
getDockerImageName
()
def
image
if
(
params
.
BUILD_LEGACY_OS
&&
conf
.
get
(
"docker_name"
,
""
)
!=
""
){
image
=
conf
.
get
(
"docker_name"
,
""
)
echo
"Using legacy docker: ${image}"
}
else
{
image
=
getDockerImageName
()
echo
"Using default docker: ${image}"
}
def
prefixpath
=
conf
.
get
(
"prefixpath"
,
"/opt/rocm"
)
// Jenkins is complaining about the render group
def
dockerOpts
=
"
--rm
--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
def
dockerOpts
=
"--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if
(
conf
.
get
(
"enforce_xnack_on"
,
false
))
{
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
}
...
...
@@ -524,6 +552,9 @@ def Build_CK(Map conf=[:]){
if
(
params
.
COMPILER_VERSION
==
"amd-staging"
||
params
.
COMPILER_VERSION
==
"amd-mainline-open"
||
params
.
COMPILER_COMMIT
!=
""
){
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
}
if
(
params
.
BUILD_LEGACY_OS
){
dockerOpts
=
dockerOpts
+
" --env LD_LIBRARY_PATH='/opt/Python-3.8.13/lib' "
}
def
video_id
=
sh
(
returnStdout:
true
,
script:
'getent group video | cut -d: -f3'
)
def
render_id
=
sh
(
returnStdout:
true
,
script:
'getent group render | cut -d: -f3'
)
dockerOpts
=
dockerOpts
+
" --group-add=${video_id} --group-add=${render_id} "
...
...
@@ -532,7 +563,7 @@ def Build_CK(Map conf=[:]){
def
variant
=
env
.
STAGE_NAME
def
retimage
gitStatusWrapper
(
credentialsId:
"${env.
status_wrapper
_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
gitStatusWrapper
(
credentialsId:
"${env.
ck_git
_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
try
{
(
retimage
,
image
)
=
getDockerImage
(
conf
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
...
...
@@ -629,7 +660,7 @@ def process_results(Map conf=[:]){
def
prefixpath
=
"/opt/rocm"
// Jenkins is complaining about the render group
def
dockerOpts
=
"
--rm
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
def
dockerOpts
=
"--cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if
(
conf
.
get
(
"enforce_xnack_on"
,
false
))
{
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
}
...
...
@@ -637,7 +668,7 @@ def process_results(Map conf=[:]){
def
variant
=
env
.
STAGE_NAME
def
retimage
gitStatusWrapper
(
credentialsId:
"${env.
status_wrapper
_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
gitStatusWrapper
(
credentialsId:
"${env.
ck_git
_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
try
{
(
retimage
,
image
)
=
getDockerImage
(
conf
)
}
...
...
@@ -651,7 +682,7 @@ def process_results(Map conf=[:]){
timeout
(
time:
1
,
unit:
'HOURS'
){
try
{
dir
(
"script"
){
if
(
params
.
RUN_CK_TILE_TESTS
){
if
(
params
.
RUN_CK_TILE_
FMHA_
TESTS
){
try
{
unstash
"perf_fmha_fwd_gfx942.log"
unstash
"perf_fmha_bwd_gfx942.log"
...
...
@@ -704,10 +735,11 @@ def process_results(Map conf=[:]){
//launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
CRON_SETTINGS
=
BRANCH_NAME
==
"develop"
?
'''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.2;RUN_CK_TILE_FMHA_TESTS=true;RUN_CK_TILE_GEMM_TESTS=true
0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
;RUN_CODEGEN_TESTS=true
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false'''
:
""
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false
0 13 * * * % BUILD_LEGACY_OS=true'''
:
""
pipeline
{
agent
none
...
...
@@ -774,6 +806,10 @@ pipeline {
name:
"RUN_GROUPED_CONV_LARGE_CASES_TESTS"
,
defaultValue:
false
,
description:
"Run the grouped conv large cases tests (default: OFF)"
)
booleanParam
(
name:
"RUN_CODEGEN_TESTS"
,
defaultValue:
false
,
description:
"Run codegen tests (default: OFF)"
)
booleanParam
(
name:
"RUN_CK_TILE_FMHA_TESTS"
,
defaultValue:
false
,
...
...
@@ -794,6 +830,10 @@ pipeline {
name:
"NINJA_BUILD_TRACE"
,
defaultValue:
false
,
description:
"Generate a ninja build trace (default: OFF)"
)
booleanParam
(
name:
"BUILD_LEGACY_OS"
,
defaultValue:
false
,
description:
"Try building CK with legacy OS dockers: RHEL8 and SLES15 (default: OFF)"
)
}
environment
{
dbuser
=
"${dbuser}"
...
...
@@ -802,7 +842,7 @@ pipeline {
dbsshport
=
"${dbsshport}"
dbsshuser
=
"${dbsshuser}"
dbsshpassword
=
"${dbsshpassword}"
status_wrapper_creds
=
"${status_wrapper
_creds}"
ck_git_creds
=
"${ck_git
_creds}"
gerrit_cred
=
"${gerrit_cred}"
DOCKER_BUILDKIT
=
"1"
}
...
...
@@ -890,7 +930,30 @@ pipeline {
execute_args
=
""" ../script/cmake-ck-dev.sh ../ gfx90a && \
make -j64 test_grouped_convnd_fwd_large_cases_xdl && \
./bin/test_grouped_convnd_fwd_large_cases_xdl"""
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
}
}
}
}
stage
(
"Run Codegen Tests"
)
{
parallel
{
stage
(
"Run Codegen Tests on gfx90a"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_CODEGEN_TESTS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)}
environment
{
setup_args
=
"NO_CK_BUILD"
execute_args
=
""" CXX=/opt/rocm/llvm/bin/clang++ cmake ../codegen && \
make -j64 check"""
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -915,7 +978,7 @@ pipeline {
make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \
cd ../ &&
example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx90a """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -934,7 +997,7 @@ pipeline {
make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \
cd ../ &&
example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx942 """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -946,7 +1009,6 @@ pipeline {
{
parallel
{
stage
(
"Run CK_TILE_GEMM Tests on gfx90a"
)
{
when
{
...
...
@@ -960,12 +1022,11 @@ pipeline {
make -j64 tile_example_gemm_basic && \
cd ../ &&
example/ck_tile/03_gemm/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx90a """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
}
}
stage
(
"Run CK_TILE_GEMM Tests on gfx942"
)
{
...
...
@@ -980,7 +1041,7 @@ pipeline {
make -j64 tile_example_gemm_basic && \
cd ../ &&
example/ck_tile/03_gemm/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx942 """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -988,15 +1049,54 @@ pipeline {
}
}
}
stage
(
"Build CK and run Tests"
)
{
parallel
{
stage
(
"Build CK with RHEL8"
)
{
when
{
beforeAgent
true
expression
{
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
def
docker_name
=
"${env.CK_DOCKERHUB_PRIVATE}:ck_rhel8_rocm6.3"
setup_args
=
""" -DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args
=
" "
}
steps
{
Build_CK_and_Reboot
(
setup_args:
setup_args
,
config_targets:
" "
,
no_reboot:
true
,
build_type:
'Release'
,
docker_name:
docker_name
)
cleanWs
()
}
}
stage
(
"Build CK with SLES15"
)
{
when
{
beforeAgent
true
expression
{
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
def
docker_name
=
"${env.CK_DOCKERHUB_PRIVATE}:ck_sles15_rocm6.3"
setup_args
=
""" -DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args
=
" "
}
steps
{
Build_CK_and_Reboot
(
setup_args:
setup_args
,
config_targets:
" "
,
no_reboot:
true
,
build_type:
'Release'
,
docker_name:
docker_name
)
cleanWs
()
}
}
stage
(
"Build CK for all gfx9 targets"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
}
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
...
...
@@ -1018,7 +1118,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
}
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx942"
)
}
environment
{
...
...
@@ -1038,7 +1138,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
}
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
...
...
@@ -1058,16 +1158,16 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
RUN_FULL_QA
.
toBoolean
()
}
expression
{
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
execute_args
=
""" cmake -D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_CXX_COMPILER="${build_compiler()}" \
-D CMAKE_BUILD_TYPE=Release \
-D
INSTANCES_ONLY=ON
\
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """
}
-D
GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102"
\
-D
CMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """
}
steps
{
buildHipClangJobAndReboot
(
setup_cmd:
""
,
build_cmd:
""
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -1077,7 +1177,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
}
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx1030"
)
}
environment
{
...
...
@@ -1097,7 +1197,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
}
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx1101"
)
}
environment
{
...
...
@@ -1117,7 +1217,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
params
.
BUILD_GFX12
.
toBoolean
()
&&
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
}
expression
{
params
.
BUILD_GFX12
.
toBoolean
()
&&
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx1201"
)
}
environment
{
...
...
@@ -1144,7 +1244,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
}
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
options
{
retry
(
1
)
}
agent
{
label
rocmnode
(
"gfx90a"
)}
...
...
@@ -1165,7 +1265,7 @@ pipeline {
stage
(
"Process results"
){
when
{
beforeAgent
true
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
}
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
'mici'
}
steps
{
...
...
README.md
View file @
63b152d6
...
...
@@ -90,7 +90,13 @@ Docker images are available on [DockerHub](https://hub.docker.com/r/rocm/composa
```
If you don't set `GPU_TARGETS` on the cmake command line, CK is built for all GPU targets
supported by the current compiler (this may take a long time).
supported by the current compiler (this may take a long time).
Tests and examples will only get built if the GPU_TARGETS is set by the user on the cmake command line.
NOTE: If you try setting `GPU_TARGETS` to a list of architectures, the build will only work if the
architectures are similar, e.g., `gfx908;gfx90a`, or `gfx1100;gfx1101;gfx11012`. Otherwise, if you
want to build the library for a list of different architectures,
you should use the `GPU_ARCHS` build argument, for example `GPU_ARCHS=gfx908;gfx1030;gfx1100;gfx942`.
4.
Build the entire CK library:
...
...
@@ -137,10 +143,6 @@ crash. In such cases, you can reduce the number of threads to 32 by using `-j32`
Additional cmake flags can be used to significantly speed-up the build:
*
`INSTANCES_ONLY`
(default is OFF) must be set to ON in order to build only the instances and library
while skipping all tests, examples, and profiler. This is useful in cases when you plan to use CK as a
dependency and don't plan to run any examples or tests.
*
`DTYPES`
(default is not set) can be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build
instances of select data types only. The main default data types are fp32 and fp16; you can safely skip
other data types.
...
...
cmake/Embed.cmake
View file @
63b152d6
...
...
@@ -233,6 +233,8 @@ function(add_embed_library EMBED_NAME)
else
()
target_sources
(
${
EMBED_NAME
}
INTERFACE $<TARGET_OBJECTS:
${
INTERNAL_EMBED_LIB
}
>
)
endif
()
target_include_directories
(
${
EMBED_NAME
}
INTERFACE
"
${
EMBED_DIR
}
/include"
)
target_include_directories
(
${
EMBED_NAME
}
INTERFACE
$<BUILD_INTERFACE:
${
EMBED_DIR
}
/include>
$<INSTALL_INTERFACE:include/ck>
)
endfunction
()
codegen/CMakeLists.txt
View file @
63b152d6
cmake_minimum_required
(
VERSION 3.16
)
project
(
composable_kernel_host
)
set
(
CMAKE_EXPORT_COMPILE_COMMANDS ON
)
set
(
CMAKE_LIBRARY_OUTPUT_DIRECTORY
${
CMAKE_BINARY_DIR
}
/lib
)
...
...
@@ -5,49 +8,51 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_BINARY_DIR
}
/bin
)
set
(
CK_ROOT
${
CMAKE_CURRENT_SOURCE_DIR
}
/..
)
add_compile_options
(
-std=c++17
)
f
in
d_package
(
hip
)
add_custom_target
(
codegen
)
find_package
(
ROCM
)
in
clude
(
ROCMInstallTargets
)
include
(
ROCMTest
)
# add include directories
include_directories
(
BEFORE
${
PROJECT_BINARY_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/library/include
${
HIP_INCLUDE_DIRS
}
)
rocm_setup_version
(
VERSION 1.0
)
list
(
APPEND CMAKE_MODULE_PATH
${
CK_ROOT
}
/cmake
)
include
(
Embed
)
file
(
GLOB_RECURSE KERNEL_FILES CONFIGURE_DEPENDS
${
CK_ROOT
}
/include/ck/*.hpp
)
#printouts fot debug purposes
#message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
#message(STATUS "RELATIVE: ${CK_ROOT}/include")
${
CK_ROOT
}
/include/ck/*.hpp
)
#
printouts fot debug purposes
#
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
#
message(STATUS "RELATIVE: ${CK_ROOT}/include")
add_embed_library
(
ck_headers
${
KERNEL_FILES
}
RELATIVE
${
CK_ROOT
}
/include
)
file
(
GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp
)
add_compile_options
(
-std=c++17
)
##message(STATUS "SOURCE_FILES: ${SOURCES}"
)
file
(
GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp
)
# TODO: Use object library
add_library
(
ck_host STATIC
${
SOURCES
}
)
target_link_libraries
(
ck_host PRIVATE ck_headers
)
set_target_properties
(
ck_host PROPERTIES
LINKER_LANGUAGE CXX
POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
ck_host PROPERTIES
LINKER_LANGUAGE CXX
POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
ck_host PUBLIC
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
#
target_include_directories(ck_host PUBLIC
#
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
#
)
add_executable
(
ck-template-driver driver/main.cpp
)
target_link_libraries
(
ck-template-driver ck_host
)
rocm_install
(
rocm_install
_targets
(
TARGETS ck_host ck_headers
EXPORT ck_hostTargets
EXPORT ck_host_targets
INCLUDE include
PRIVATE
)
rocm_install
(
DIRECTORY include/ck DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
rocm_export_targets
(
EXPORT ck_host_targets
NAMESPACE composable_kernel::
)
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
endif
()
add_subdirectory
(
test
)
codegen/test/CMakeLists.txt
View file @
63b152d6
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
add_subdirectory
(
rtc
)
file
(
GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp
)
if
(
NOT INSTANCES_ONLY
)
foreach
(
TEST_SRC
${
TEST_SRCS
}
)
set_source_files_properties
(
${
TEST_SRC
}
PROPERTIES LANGUAGE HIP
)
get_filename_component
(
BASE_NAME
${
TEST_SRC
}
NAME_WE
)
add_executable
(
codegen_test_
${
BASE_NAME
}
${
TEST_SRC
}
)
if
(
CK_USE_ALTERNATIVE_PYTHON
)
target_link_options
(
codegen_test_
${
BASE_NAME
}
PRIVATE -lstdc++fs
)
endif
()
add_dependencies
(
codegen codegen_test_
${
BASE_NAME
}
)
add_dependencies
(
tests codegen_test_
${
BASE_NAME
}
)
add_dependencies
(
check codegen_test_
${
BASE_NAME
}
)
add_test
(
NAME codegen_test_
${
BASE_NAME
}
COMMAND codegen_test_
${
BASE_NAME
}
)
message
(
"adding test codegen_test_
${
BASE_NAME
}
"
)
target_link_libraries
(
codegen_test_
${
BASE_NAME
}
ck_rtc ck_host
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/codegen/test/include
)
# TODO: These tests need to be refactored to remove dependency on main ck
# headers and device compilation.
set
(
TESTS_REQUIRE_DEVICE_COMPILE
grouped_conv_fwd_multiple_d_v1
grouped_conv_fwd_multiple_d_v2
grouped_conv_fwd_multiple_d_v3
grouped_conv_fwd_multiple_d_v4
)
find_package
(
hip
)
foreach
(
TEST_SRC
${
TEST_SRCS
}
)
get_filename_component
(
BASE_NAME
${
TEST_SRC
}
NAME_WE
)
rocm_add_test_executable
(
codegen_test_
${
BASE_NAME
}
${
TEST_SRC
}
)
target_link_libraries
(
codegen_test_
${
BASE_NAME
}
ck_rtc ck_host
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC include
)
if
(
BASE_NAME IN_LIST TESTS_REQUIRE_DEVICE_COMPILE
)
target_link_libraries
(
codegen_test_
${
BASE_NAME
}
hip::device
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/include
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/library/include
)
endf
oreach
()
end
i
f
()
end
i
f
()
endf
oreach
()
codegen/test/common.hpp
→
codegen/test/
include/
common.hpp
View file @
63b152d6
File moved
codegen/test/rtc/CMakeLists.txt
View file @
63b152d6
find_package
(
hip
)
file
(
GLOB RTC_SOURCES CONFIGURE_DEPENDS src/*.cpp
)
add_library
(
ck_rtc
${
RTC_SOURCES
}
)
target_include_directories
(
ck_rtc PUBLIC include
)
target_link_libraries
(
ck_rtc PUBLIC hip::host
)
target_link_libraries
(
ck_rtc PUBLIC -lstdc++fs
)
codegen/test/rtc/include/rtc/compile_kernel.hpp
View file @
63b152d6
...
...
@@ -2,14 +2,14 @@
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL
#include <rtc/kernel.hpp>
#include <c
k
/filesystem.hpp>
#include <
rt
c/filesystem.hpp>
#include <string>
namespace
rtc
{
struct
src_file
{
CK
::
fs
::
path
path
;
fs
::
path
path
;
std
::
string_view
content
;
};
...
...
codegen/test/rtc/include/rtc/filesystem.hpp
0 → 100644
View file @
63b152d6
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#ifndef GUARD_TEST_HOST_RTC_FILESYSTEM_HPP
#define GUARD_TEST_HOST_RTC_FILESYSTEM_HPP
#include <string>
#include <string_view>
// clang-format off
#if defined(CPPCHECK)
#define RTC_HAS_FILESYSTEM 1
#define RTC_HAS_FILESYSTEM_TS 1
#elif defined(_WIN32)
#if _MSC_VER >= 1920
#define RTC_HAS_FILESYSTEM 1
#define RTC_HAS_FILESYSTEM_TS 0
#elif _MSC_VER >= 1900
#define RTC_HAS_FILESYSTEM 0
#define RTC_HAS_FILESYSTEM_TS 1
#else
#define RTC_HAS_FILESYSTEM 0
#define RTC_HAS_FILESYSTEM_TS 0
#endif
#elif defined(__has_include)
#if __has_include(<filesystem>) && __cplusplus >= 201703L
#define RTC_HAS_FILESYSTEM 1
#else
#define RTC_HAS_FILESYSTEM 0
#endif
#if __has_include(<experimental/filesystem>) && __cplusplus >= 201103L
#define RTC_HAS_FILESYSTEM_TS 1
#else
#define RTC_HAS_FILESYSTEM_TS 0
#endif
#else
#define RTC_HAS_FILESYSTEM 0
#define RTC_HAS_FILESYSTEM_TS 0
#endif
// clang-format on
#if RTC_HAS_FILESYSTEM
#include <filesystem>
#elif RTC_HAS_FILESYSTEM_TS
#include <experimental/filesystem>
#else
#error "No filesystem include available"
#endif
namespace
rtc
{
#if RTC_HAS_FILESYSTEM
namespace
fs
=
::
std
::
filesystem
;
#elif RTC_HAS_FILESYSTEM_TS
namespace
fs
=
::
std
::
experimental
::
filesystem
;
#endif
}
// namespace rtc
#endif // GUARD_RTC_FILESYSTEM_HPP_
codegen/test/rtc/include/rtc/tmp_dir.hpp
View file @
63b152d6
...
...
@@ -2,13 +2,13 @@
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_TMP_DIR
#include <string>
#include <c
k
/filesystem.hpp>
#include <
rt
c/filesystem.hpp>
namespace
rtc
{
struct
tmp_dir
{
CK
::
fs
::
path
path
;
fs
::
path
path
;
tmp_dir
(
const
std
::
string
&
prefix
=
""
);
void
execute
(
const
std
::
string
&
cmd
)
const
;
...
...
codegen/test/rtc/src/compile_kernel.cpp
View file @
63b152d6
#include
"
rtc/hip.hpp
"
#include
<
rtc/hip.hpp
>
#include <rtc/compile_kernel.hpp>
#include <rtc/tmp_dir.hpp>
#include <stdexcept>
...
...
@@ -70,9 +70,9 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
for
(
const
auto
&
src
:
srcs
)
{
CK
::
fs
::
path
full_path
=
td
.
path
/
src
.
path
;
CK
::
fs
::
path
parent_path
=
full_path
.
parent_path
();
CK
::
fs
::
create_directories
(
parent_path
);
fs
::
path
full_path
=
td
.
path
/
src
.
path
;
fs
::
path
parent_path
=
full_path
.
parent_path
();
fs
::
create_directories
(
parent_path
);
write_string
(
full_path
.
string
(),
src
.
content
);
if
(
src
.
path
.
extension
().
string
()
==
".cpp"
)
{
...
...
@@ -86,7 +86,7 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
td
.
execute
(
compiler
()
+
options
.
flags
);
auto
out_path
=
td
.
path
/
out
;
if
(
not
CK
::
fs
::
exists
(
out_path
))
if
(
not
fs
::
exists
(
out_path
))
throw
std
::
runtime_error
(
"Output file missing: "
+
out
);
auto
obj
=
read_buffer
(
out_path
.
string
());
...
...
codegen/test/rtc/src/tmp_dir.cpp
View file @
63b152d6
...
...
@@ -31,10 +31,10 @@ std::string unique_string(const std::string& prefix)
}
tmp_dir
::
tmp_dir
(
const
std
::
string
&
prefix
)
:
path
(
CK
::
fs
::
temp_directory_path
()
/
:
path
(
fs
::
temp_directory_path
()
/
unique_string
(
prefix
.
empty
()
?
"ck-rtc"
:
"ck-rtc-"
+
prefix
))
{
CK
::
fs
::
create_directories
(
this
->
path
);
fs
::
create_directories
(
this
->
path
);
}
void
tmp_dir
::
execute
(
const
std
::
string
&
cmd
)
const
...
...
@@ -43,6 +43,6 @@ void tmp_dir::execute(const std::string& cmd) const
std
::
system
(
s
.
c_str
());
}
tmp_dir
::~
tmp_dir
()
{
CK
::
fs
::
remove_all
(
this
->
path
);
}
tmp_dir
::~
tmp_dir
()
{
fs
::
remove_all
(
this
->
path
);
}
}
// namespace rtc
docs/reference/API_Reference_Guide.rst
View file @
63b152d6
...
...
@@ -12,12 +12,6 @@ API reference guide
This document contains details of the APIs for the Composable Kernel (CK) library and introduces
some of the key design principles that are used to write new classes that extend CK functionality.
=================
Using CK API
=================
This section describes how to use the CK library API.
=================
CK Datatypes
=================
...
...
docs/sphinx/requirements.in
View file @
63b152d6
rocm-docs-core==1.8.
1
rocm-docs-core==1.8.
2
sphinxcontrib-bibtex==2.6.3
docs/sphinx/requirements.txt
View file @
63b152d6
...
...
@@ -103,7 +103,7 @@ requests==2.32.3
# via
# pygithub
# sphinx
rocm-docs-core==1.8.
1
rocm-docs-core==1.8.
2
# via -r requirements.in
six==1.16.0
# via pybtex
...
...
example/01_gemm/common.hpp
View file @
63b152d6
...
...
@@ -21,6 +21,7 @@
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
struct
ProblemSize
final
{
...
...
@@ -28,9 +29,9 @@ struct ProblemSize final
ck
::
index_t
N
=
4096
;
ck
::
index_t
K
=
4096
;
ck
::
index_t
StrideA
=
4096
;
ck
::
index_t
StrideB
=
4096
;
ck
::
index_t
StrideC
=
4096
;
ck
::
index_t
StrideA
=
0
;
ck
::
index_t
StrideB
=
0
;
ck
::
index_t
StrideC
=
0
;
};
struct
ProblemSizeStreamK
final
...
...
@@ -39,9 +40,9 @@ struct ProblemSizeStreamK final
ck
::
index_t
N
=
4096
;
ck
::
index_t
K
=
4096
;
ck
::
index_t
StrideA
=
4096
;
ck
::
index_t
StrideB
=
4096
;
ck
::
index_t
StrideC
=
4096
;
ck
::
index_t
StrideA
=
0
;
ck
::
index_t
StrideB
=
0
;
ck
::
index_t
StrideC
=
0
;
ck
::
index_t
NumSKBlocks
=
-
1
;
};
...
...
@@ -51,9 +52,9 @@ struct ProblemSizeStreamK_universal final
ck
::
index_t
N
=
4096
;
ck
::
index_t
K
=
4096
;
ck
::
index_t
StrideA
=
4096
;
ck
::
index_t
StrideB
=
4096
;
ck
::
index_t
StrideC
=
4096
;
ck
::
index_t
StrideA
=
0
;
ck
::
index_t
StrideB
=
0
;
ck
::
index_t
StrideC
=
0
;
ck
::
index_t
Grid_size
=
-
1
;
// defaults to max occupancy
ck
::
index_t
Streamk_sel
=
1
;
// defaults to 1-tile SK
...
...
@@ -65,9 +66,9 @@ struct ProblemSizeSplitK final
ck
::
index_t
N
=
4096
;
ck
::
index_t
K
=
4096
;
ck
::
index_t
StrideA
=
4096
;
ck
::
index_t
StrideB
=
4096
;
ck
::
index_t
StrideC
=
4096
;
ck
::
index_t
StrideA
=
0
;
ck
::
index_t
StrideB
=
0
;
ck
::
index_t
StrideC
=
0
;
ck
::
index_t
KBatch
=
1
;
};
...
...
@@ -125,7 +126,7 @@ bool parse_cmd_args<ProblemSize>(int argc,
}
else
{
std
::
cerr
<<
"arg1: verification (0=no, 1=
yes
)"
<<
std
::
endl
std
::
cerr
<<
"arg1: verification (0=no, 1=
CPU and GPU
)"
<<
std
::
endl
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<<
std
::
endl
<<
"arg3: time kernel (0=no, 1=yes)"
<<
std
::
endl
...
...
@@ -175,7 +176,7 @@ bool parse_cmd_args<ProblemSizeStreamK_universal>(int argc,
else
{
std
::
cerr
<<
"arg1: verification (0=no, 1=
yes
)"
<<
std
::
endl
<<
"arg1: verification (0=no, 1=
CPU and GPU
)"
<<
std
::
endl
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<<
std
::
endl
<<
"arg3: time kernel (0=no, 1=yes)"
<<
std
::
endl
<<
"arg4 to 9: M (256x), N(128x), K(32x), StrideA, StrideB, StrideC"
<<
std
::
endl
...
...
@@ -224,7 +225,7 @@ bool parse_cmd_args<ProblemSizeStreamK>(int argc,
}
else
{
std
::
cerr
<<
"arg1: verification (0=no, 1=
yes
)"
<<
std
::
endl
std
::
cerr
<<
"arg1: verification (0=no, 1=
CPU and GPU
)"
<<
std
::
endl
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<<
std
::
endl
<<
"arg3: time kernel (0=no, 1=yes)"
<<
std
::
endl
...
...
@@ -274,7 +275,7 @@ bool parse_cmd_args<ProblemSizeSplitK>(int argc,
}
else
{
std
::
cerr
<<
"arg1: verification (0=no, 1=
yes
)"
<<
std
::
endl
std
::
cerr
<<
"arg1: verification (0=no, 1=
CPU and GPU
)"
<<
std
::
endl
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<<
std
::
endl
<<
"arg3: time kernel (0=no, 1=yes)"
<<
std
::
endl
...
...
example/01_gemm/gemm_dl_fp16.cpp
View file @
63b152d6
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -32,6 +32,17 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmDl
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_dl_fp32.cpp
View file @
63b152d6
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -32,6 +32,17 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmDl
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_dl_int8.cpp
View file @
63b152d6
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -32,6 +32,17 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmDl
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
Prev
1
2
3
4
5
…
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment