Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
0ab48d62
Commit
0ab48d62
authored
Oct 15, 2024
by
Mirza Halilcevic
Browse files
Merge remote-tracking branch 'upstream/develop' into ck_host_lib
parents
837f644f
10158b0f
Changes
14
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
595 additions
and
90 deletions
+595
-90
Jenkinsfile
Jenkinsfile
+38
-11
codegen/CMakeLists.txt
codegen/CMakeLists.txt
+29
-46
codegen/test/CMakeLists.txt
codegen/test/CMakeLists.txt
+20
-18
codegen/test/include/common.hpp
codegen/test/include/common.hpp
+0
-0
codegen/test/rtc/CMakeLists.txt
codegen/test/rtc/CMakeLists.txt
+2
-0
codegen/test/rtc/include/rtc/compile_kernel.hpp
codegen/test/rtc/include/rtc/compile_kernel.hpp
+2
-2
codegen/test/rtc/include/rtc/filesystem.hpp
codegen/test/rtc/include/rtc/filesystem.hpp
+60
-0
codegen/test/rtc/include/rtc/tmp_dir.hpp
codegen/test/rtc/include/rtc/tmp_dir.hpp
+2
-2
codegen/test/rtc/src/compile_kernel.cpp
codegen/test/rtc/src/compile_kernel.cpp
+5
-5
codegen/test/rtc/src/tmp_dir.cpp
codegen/test/rtc/src/tmp_dir.cpp
+3
-3
example/ck_tile/03_gemm/gemm_basic.cpp
example/ck_tile/03_gemm/gemm_basic.cpp
+5
-2
include/ck_tile/core/config.hpp
include/ck_tile/core/config.hpp
+4
-1
include/ck_tile/ops/gemm.hpp
include/ck_tile/ops/gemm.hpp
+1
-0
include/ck_tile/ops/gemm/pipeline/gemm_universal_pipeline_ag_bg_cr_policy.hpp
...gemm/pipeline/gemm_universal_pipeline_ag_bg_cr_policy.hpp
+424
-0
No files found.
Jenkinsfile
View file @
0ab48d62
...
...
@@ -735,11 +735,11 @@ def process_results(Map conf=[:]){
//launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
CRON_SETTINGS
=
BRANCH_NAME
==
"develop"
?
'''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.2;RUN_CK_TILE_FMHA_TESTS=true;RUN_CK_TILE_GEMM_TESTS=true
0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
;RUN_CODEGEN_TESTS=true
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_
CODEGEN_TESTS=false;RUN_
PERFORMANCE_TESTS=false;USE_SCCACHE=false
0 13 * * * % BUILD_LEGACY_OS=true
'''
:
""
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false
0 13 * * * % BUILD_LEGACY_OS=true'''
:
""
pipeline
{
agent
none
...
...
@@ -806,6 +806,10 @@ pipeline {
name:
"RUN_GROUPED_CONV_LARGE_CASES_TESTS"
,
defaultValue:
false
,
description:
"Run the grouped conv large cases tests (default: OFF)"
)
booleanParam
(
name:
"RUN_CODEGEN_TESTS"
,
defaultValue:
false
,
description:
"Run codegen tests (default: OFF)"
)
booleanParam
(
name:
"RUN_CK_TILE_FMHA_TESTS"
,
defaultValue:
false
,
...
...
@@ -926,7 +930,30 @@ pipeline {
execute_args
=
""" ../script/cmake-ck-dev.sh ../ gfx90a && \
make -j64 test_grouped_convnd_fwd_large_cases_xdl && \
./bin/test_grouped_convnd_fwd_large_cases_xdl"""
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
}
}
}
}
stage
(
"Run Codegen Tests"
)
{
parallel
{
stage
(
"Run Codegen Tests on gfx90a"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_CODEGEN_TESTS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)}
environment
{
setup_args
=
"NO_CK_BUILD"
execute_args
=
""" CXX=/opt/rocm/llvm/bin/clang++ cmake ../codegen && \
make -j64 check"""
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -951,7 +978,7 @@ pipeline {
make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \
cd ../ &&
example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx90a """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -970,7 +997,7 @@ pipeline {
make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \
cd ../ &&
example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx942 """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -995,7 +1022,7 @@ pipeline {
make -j64 tile_example_gemm_basic && \
cd ../ &&
example/ck_tile/03_gemm/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx90a """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -1014,7 +1041,7 @@ pipeline {
make -j64 tile_example_gemm_basic && \
cd ../ &&
example/ck_tile/03_gemm/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx942 """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
@@ -1040,7 +1067,7 @@ pipeline {
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args
=
" "
}
}
steps
{
Build_CK_and_Reboot
(
setup_args:
setup_args
,
config_targets:
" "
,
no_reboot:
true
,
build_type:
'Release'
,
docker_name:
docker_name
)
cleanWs
()
...
...
@@ -1059,7 +1086,7 @@ pipeline {
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args
=
" "
}
}
steps
{
Build_CK_and_Reboot
(
setup_args:
setup_args
,
config_targets:
" "
,
no_reboot:
true
,
build_type:
'Release'
,
docker_name:
docker_name
)
cleanWs
()
...
...
@@ -1140,7 +1167,7 @@ pipeline {
-D CMAKE_BUILD_TYPE=Release \
-D GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102" \
-D CMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """
}
}
steps
{
buildHipClangJobAndReboot
(
setup_cmd:
""
,
build_cmd:
""
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
...
...
codegen/CMakeLists.txt
View file @
0ab48d62
cmake_minimum_required
(
VERSION 3.16
)
project
(
composable_kernel_host
)
set
(
CMAKE_EXPORT_COMPILE_COMMANDS ON
)
set
(
CMAKE_LIBRARY_OUTPUT_DIRECTORY
${
CMAKE_BINARY_DIR
}
/lib
)
...
...
@@ -5,71 +8,51 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_BINARY_DIR
}
/bin
)
set
(
CK_ROOT
${
CMAKE_CURRENT_SOURCE_DIR
}
/..
)
add_compile_options
(
-std=c++17
)
f
in
d_package
(
hip
)
add_custom_target
(
codegen
)
find_package
(
ROCM
)
in
clude
(
ROCMInstallTargets
)
include
(
ROCMTest
)
# add include directories
include_directories
(
BEFORE
${
PROJECT_BINARY_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/library/include
${
HIP_INCLUDE_DIRS
}
)
rocm_setup_version
(
VERSION 1.0
)
list
(
APPEND CMAKE_MODULE_PATH
${
CK_ROOT
}
/cmake
)
include
(
Embed
)
file
(
GLOB_RECURSE KERNEL_FILES CONFIGURE_DEPENDS
${
CK_ROOT
}
/include/ck/*.hpp
)
# printouts fo
r
debug purposes
#message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
#message(STATUS "RELATIVE: ${CK_ROOT}/include")
${
CK_ROOT
}
/include/ck/*.hpp
)
# printouts fo
t
debug purposes
#
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
#
message(STATUS "RELATIVE: ${CK_ROOT}/include")
add_embed_library
(
ck_headers
${
KERNEL_FILES
}
RELATIVE
${
CK_ROOT
}
/include
)
file
(
GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp
)
add_compile_options
(
-std=c++17
)
#message(STATUS "SOURCE_FILES: ${SOURCES}"
)
file
(
GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp
)
# TODO: Use object library
add_library
(
ck_host STATIC
${
SOURCES
}
)
add_library
(
composable_kernel::ck_host ALIAS ck_host
)
target_link_libraries
(
ck_host PRIVATE ck_headers
)
set_target_properties
(
ck_host PROPERTIES
LINKER_LANGUAGE CXX
POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
ck_host PROPERTIES
LINKER_LANGUAGE CXX
POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
ck_host SYSTEM PRIVATE
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_BINARY_DIR
}
/solution_instances>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_BINARY_DIR
}
/embed/ck_headers/include>
)
target_link_libraries
(
ck_host PRIVATE $<BUILD_INTERFACE:ck_headers>
)
target_include_directories
(
ck_host PUBLIC
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
$<INSTALL_INTERFACE:include>
)
# target_include_directories(ck_host PUBLIC
# $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
# )
add_executable
(
ck-template-driver driver/main.cpp
)
target_link_libraries
(
ck-template-driver ck_host
)
rocm_install
(
TARGETS ck_host
EXPORT ck_hostTargets
rocm_install_targets
(
TARGETS ck_host ck_headers
EXPORT ck_host_targets
INCLUDE include
PRIVATE
)
rocm_install
(
EXPORT ck_hostTargets
FILE composable_kernelck_hostTargets.cmake
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
rocm_install
(
DIRECTORY include/ck DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
rocm_install
(
EXPORT ck_hostTargets
FILE composable_kernelck_hostTargets.cmake
rocm_export_targets
(
EXPORT ck_host_targets
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
if
(
NOT CK_BUILD_HOST_LIB
AND BUILD_TESTING
)
if
(
BUILD_TESTING AND
NOT CK_BUILD_HOST_LIB
)
add_subdirectory
(
test
)
endif
()
codegen/test/CMakeLists.txt
View file @
0ab48d62
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
add_subdirectory
(
rtc
)
file
(
GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp
)
# do not build the tests when we build the library for various targets
if
(
NOT GPU_ARCHS
)
foreach
(
TEST_SRC
${
TEST_SRCS
}
)
set_source_files_properties
(
${
TEST_SRC
}
PROPERTIES LANGUAGE HIP
)
get_filename_component
(
BASE_NAME
${
TEST_SRC
}
NAME_WE
)
add_executable
(
codegen_test_
${
BASE_NAME
}
${
TEST_SRC
}
)
if
(
CK_USE_ALTERNATIVE_PYTHON
)
target_link_options
(
codegen_test_
${
BASE_NAME
}
PRIVATE -lstdc++fs
)
endif
()
add_dependencies
(
codegen codegen_test_
${
BASE_NAME
}
)
add_dependencies
(
tests codegen_test_
${
BASE_NAME
}
)
add_dependencies
(
check codegen_test_
${
BASE_NAME
}
)
add_test
(
NAME codegen_test_
${
BASE_NAME
}
COMMAND codegen_test_
${
BASE_NAME
}
)
message
(
"adding test codegen_test_
${
BASE_NAME
}
"
)
target_link_libraries
(
codegen_test_
${
BASE_NAME
}
ck_rtc ck_host
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/codegen/test/include
)
# TODO: These tests need to be refactored to remove dependency on main ck
# headers and device compilation.
set
(
TESTS_REQUIRE_DEVICE_COMPILE
grouped_conv_fwd_multiple_d_v1
grouped_conv_fwd_multiple_d_v2
grouped_conv_fwd_multiple_d_v3
grouped_conv_fwd_multiple_d_v4
)
find_package
(
hip
)
foreach
(
TEST_SRC
${
TEST_SRCS
}
)
get_filename_component
(
BASE_NAME
${
TEST_SRC
}
NAME_WE
)
rocm_add_test_executable
(
codegen_test_
${
BASE_NAME
}
${
TEST_SRC
}
)
target_link_libraries
(
codegen_test_
${
BASE_NAME
}
ck_rtc ck_host
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC include
)
if
(
BASE_NAME IN_LIST TESTS_REQUIRE_DEVICE_COMPILE
)
target_link_libraries
(
codegen_test_
${
BASE_NAME
}
hip::device
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/include
)
target_include_directories
(
codegen_test_
${
BASE_NAME
}
PUBLIC
${
CK_ROOT
}
/library/include
)
endf
oreach
()
end
i
f
()
end
i
f
()
endf
oreach
()
codegen/test/common.hpp
→
codegen/test/
include/
common.hpp
View file @
0ab48d62
File moved
codegen/test/rtc/CMakeLists.txt
View file @
0ab48d62
find_package
(
hip
)
file
(
GLOB RTC_SOURCES CONFIGURE_DEPENDS src/*.cpp
)
add_library
(
ck_rtc
${
RTC_SOURCES
}
)
target_include_directories
(
ck_rtc PUBLIC include
)
target_link_libraries
(
ck_rtc PUBLIC hip::host
)
target_link_libraries
(
ck_rtc PUBLIC -lstdc++fs
)
codegen/test/rtc/include/rtc/compile_kernel.hpp
View file @
0ab48d62
...
...
@@ -2,14 +2,14 @@
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL
#include <rtc/kernel.hpp>
#include <c
k
/filesystem.hpp>
#include <
rt
c/filesystem.hpp>
#include <string>
namespace
rtc
{
struct
src_file
{
CK
::
fs
::
path
path
;
fs
::
path
path
;
std
::
string_view
content
;
};
...
...
codegen/test/rtc/include/rtc/filesystem.hpp
0 → 100644
View file @
0ab48d62
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#ifndef GUARD_TEST_HOST_RTC_FILESYSTEM_HPP
#define GUARD_TEST_HOST_RTC_FILESYSTEM_HPP
#include <string>
#include <string_view>
// clang-format off
#if defined(CPPCHECK)
#define RTC_HAS_FILESYSTEM 1
#define RTC_HAS_FILESYSTEM_TS 1
#elif defined(_WIN32)
#if _MSC_VER >= 1920
#define RTC_HAS_FILESYSTEM 1
#define RTC_HAS_FILESYSTEM_TS 0
#elif _MSC_VER >= 1900
#define RTC_HAS_FILESYSTEM 0
#define RTC_HAS_FILESYSTEM_TS 1
#else
#define RTC_HAS_FILESYSTEM 0
#define RTC_HAS_FILESYSTEM_TS 0
#endif
#elif defined(__has_include)
#if __has_include(<filesystem>) && __cplusplus >= 201703L
#define RTC_HAS_FILESYSTEM 1
#else
#define RTC_HAS_FILESYSTEM 0
#endif
#if __has_include(<experimental/filesystem>) && __cplusplus >= 201103L
#define RTC_HAS_FILESYSTEM_TS 1
#else
#define RTC_HAS_FILESYSTEM_TS 0
#endif
#else
#define RTC_HAS_FILESYSTEM 0
#define RTC_HAS_FILESYSTEM_TS 0
#endif
// clang-format on
#if RTC_HAS_FILESYSTEM
#include <filesystem>
#elif RTC_HAS_FILESYSTEM_TS
#include <experimental/filesystem>
#else
#error "No filesystem include available"
#endif
namespace
rtc
{
#if RTC_HAS_FILESYSTEM
namespace
fs
=
::
std
::
filesystem
;
#elif RTC_HAS_FILESYSTEM_TS
namespace
fs
=
::
std
::
experimental
::
filesystem
;
#endif
}
// namespace rtc
#endif // GUARD_RTC_FILESYSTEM_HPP_
codegen/test/rtc/include/rtc/tmp_dir.hpp
View file @
0ab48d62
...
...
@@ -2,13 +2,13 @@
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_TMP_DIR
#include <string>
#include <c
k
/filesystem.hpp>
#include <
rt
c/filesystem.hpp>
namespace
rtc
{
struct
tmp_dir
{
CK
::
fs
::
path
path
;
fs
::
path
path
;
tmp_dir
(
const
std
::
string
&
prefix
=
""
);
void
execute
(
const
std
::
string
&
cmd
)
const
;
...
...
codegen/test/rtc/src/compile_kernel.cpp
View file @
0ab48d62
#include
"
rtc/hip.hpp
"
#include
<
rtc/hip.hpp
>
#include <rtc/compile_kernel.hpp>
#include <rtc/tmp_dir.hpp>
#include <stdexcept>
...
...
@@ -70,9 +70,9 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
for
(
const
auto
&
src
:
srcs
)
{
CK
::
fs
::
path
full_path
=
td
.
path
/
src
.
path
;
CK
::
fs
::
path
parent_path
=
full_path
.
parent_path
();
CK
::
fs
::
create_directories
(
parent_path
);
fs
::
path
full_path
=
td
.
path
/
src
.
path
;
fs
::
path
parent_path
=
full_path
.
parent_path
();
fs
::
create_directories
(
parent_path
);
write_string
(
full_path
.
string
(),
src
.
content
);
if
(
src
.
path
.
extension
().
string
()
==
".cpp"
)
{
...
...
@@ -86,7 +86,7 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
td
.
execute
(
compiler
()
+
options
.
flags
);
auto
out_path
=
td
.
path
/
out
;
if
(
not
CK
::
fs
::
exists
(
out_path
))
if
(
not
fs
::
exists
(
out_path
))
throw
std
::
runtime_error
(
"Output file missing: "
+
out
);
auto
obj
=
read_buffer
(
out_path
.
string
());
...
...
codegen/test/rtc/src/tmp_dir.cpp
View file @
0ab48d62
...
...
@@ -31,10 +31,10 @@ std::string unique_string(const std::string& prefix)
}
tmp_dir
::
tmp_dir
(
const
std
::
string
&
prefix
)
:
path
(
CK
::
fs
::
temp_directory_path
()
/
:
path
(
fs
::
temp_directory_path
()
/
unique_string
(
prefix
.
empty
()
?
"ck-rtc"
:
"ck-rtc-"
+
prefix
))
{
CK
::
fs
::
create_directories
(
this
->
path
);
fs
::
create_directories
(
this
->
path
);
}
void
tmp_dir
::
execute
(
const
std
::
string
&
cmd
)
const
...
...
@@ -43,6 +43,6 @@ void tmp_dir::execute(const std::string& cmd) const
std
::
system
(
s
.
c_str
());
}
tmp_dir
::~
tmp_dir
()
{
CK
::
fs
::
remove_all
(
this
->
path
);
}
tmp_dir
::~
tmp_dir
()
{
fs
::
remove_all
(
this
->
path
);
}
}
// namespace rtc
example/ck_tile/03_gemm/gemm_basic.cpp
View file @
0ab48d62
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -282,7 +281,11 @@ int main(int argc, char* argv[])
using
CodegenPipelineProblem
=
ck_tile
::
GemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
CodegenGemmShape
,
CodegenGemmTraits
>
;
using
CodegenGemmPipeline
=
ck_tile
::
GemmPipelineAGmemBGmemCRegV1
<
CodegenPipelineProblem
>
;
using
CodegenGemmPolicy
=
ck_tile
::
UniversalGemmPipelineAgBgCrPolicy
<
matrix_a_layout
,
matrix_b_layout
,
matrix_c_layout
>
;
using
CodegenGemmPipeline
=
ck_tile
::
GemmPipelineAGmemBGmemCRegV1
<
CodegenPipelineProblem
,
CodegenGemmPolicy
>
;
invoke_gemm
<
ck_tile
::
half_t
,
matrix_a_layout
,
...
...
include/ck_tile/core/config.hpp
View file @
0ab48d62
...
...
@@ -157,8 +157,11 @@
#endif
#endif
// workaround for ROCm 6.2 and later
#ifndef CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE
#if HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 2 && HIP_VERSION_PATCH >= 41133
#if(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 2 && HIP_VERSION_PATCH >= 41133) || \
(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 3 && HIP_VERSION_PATCH >= 42131) || \
(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR > 3)
#define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 1
#else
#define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 0
...
...
include/ck_tile/ops/gemm.hpp
View file @
0ab48d62
...
...
@@ -23,6 +23,7 @@
#include "ck_tile/ops/gemm/block/block_gemm_problem.hpp"
#include "ck_tile/ops/gemm/kernel/gemm_kernel.hpp"
#include "ck_tile/ops/gemm/kernel/gemm_tile_partitioner.hpp"
#include "ck_tile/ops/gemm/pipeline/gemm_universal_pipeline_ag_bg_cr_policy.hpp"
#include "ck_tile/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v1.hpp"
#include "ck_tile/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v1_default_policy.hpp"
#include "ck_tile/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v2.hpp"
...
...
include/ck_tile/ops/gemm/pipeline/gemm_universal_pipeline_ag_bg_cr_policy.hpp
0 → 100644
View file @
0ab48d62
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment