Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6a25d081
Commit
6a25d081
authored
Oct 09, 2024
by
carlushuang
Browse files
Merge remote-tracking branch 'origin/develop' into ck_tile/fav2_fwd_sept
parents
02f8c487
ceaed8e0
Changes
73
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
240 additions
and
103 deletions
+240
-103
CMakeLists.txt
CMakeLists.txt
+44
-63
Jenkinsfile
Jenkinsfile
+4
-4
README.md
README.md
+6
-5
cmake/Embed.cmake
cmake/Embed.cmake
+3
-1
codegen/CMakeLists.txt
codegen/CMakeLists.txt
+8
-1
codegen/test/CMakeLists.txt
codegen/test/CMakeLists.txt
+2
-1
example/01_gemm/common.hpp
example/01_gemm/common.hpp
+17
-16
example/01_gemm/gemm_dl_fp16.cpp
example/01_gemm/gemm_dl_fp16.cpp
+12
-1
example/01_gemm/gemm_dl_fp32.cpp
example/01_gemm/gemm_dl_fp32.cpp
+12
-1
example/01_gemm/gemm_dl_int8.cpp
example/01_gemm/gemm_dl_int8.cpp
+12
-1
example/01_gemm/gemm_dpp_fp16.cpp
example/01_gemm/gemm_dpp_fp16.cpp
+4
-1
example/01_gemm/gemm_wmma_fp16.cpp
example/01_gemm/gemm_wmma_fp16.cpp
+12
-1
example/01_gemm/gemm_xdl_bf16.cpp
example/01_gemm/gemm_xdl_bf16.cpp
+15
-1
example/01_gemm/gemm_xdl_bf16_rtn.cpp
example/01_gemm/gemm_xdl_bf16_rtn.cpp
+15
-1
example/01_gemm/gemm_xdl_fp16.cpp
example/01_gemm/gemm_xdl_fp16.cpp
+12
-1
example/01_gemm/gemm_xdl_fp16_fp8.cpp
example/01_gemm/gemm_xdl_fp16_fp8.cpp
+12
-1
example/01_gemm/gemm_xdl_fp16_v2.cpp
example/01_gemm/gemm_xdl_fp16_v2.cpp
+12
-1
example/01_gemm/gemm_xdl_fp64.cpp
example/01_gemm/gemm_xdl_fp64.cpp
+12
-1
example/01_gemm/gemm_xdl_fp8.cpp
example/01_gemm/gemm_xdl_fp8.cpp
+14
-0
example/01_gemm/gemm_xdl_fp8_bf8.cpp
example/01_gemm/gemm_xdl_fp8_bf8.cpp
+12
-1
No files found.
CMakeLists.txt
View file @
6a25d081
...
...
@@ -97,10 +97,9 @@ if(DL_KERNELS)
add_definitions
(
-DDL_KERNELS
)
set
(
CK_ENABLE_DL_KERNELS
"ON"
)
endif
()
if
(
INSTANCES_ONLY
)
add_definitions
(
-DINSTANCES_ONLY
)
set
(
CK_ENABLE_INSTANCES_ONLY
"ON"
)
option
(
CK_USE_CODEGEN
"Enable codegen library"
OFF
)
if
(
CK_USE_CODEGEN
)
add_definitions
(
-DCK_USE_CODEGEN
)
endif
()
include
(
getopt
)
...
...
@@ -127,6 +126,12 @@ rocm_setup_version(VERSION ${version})
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
"$ENV{ROCM_PATH}"
"$ENV{HIP_PATH}"
)
message
(
"GPU_TARGETS=
${
GPU_TARGETS
}
"
)
message
(
"GPU_ARCHS=
${
GPU_ARCHS
}
"
)
if
(
GPU_ARCHS
)
#disable GPU_TARGETS to avoid conflicts, this needs to happen before we call hip package
unset
(
GPU_TARGETS CACHE
)
unset
(
AMDGPU_TARGETS CACHE
)
endif
()
find_package
(
hip
)
# No assumption that HIP kernels are launched with uniform block size for backward compatibility
...
...
@@ -135,55 +140,38 @@ math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR})
message
(
"hip_version_flat=
${
hip_VERSION_FLAT
}
"
)
message
(
"checking which targets are supported"
)
#This is the list of targets to be used in case GPU_TARGETS is not set on command line
#These targets will be filtered and only supported ones will be used
#Setting GPU_TARGETS on command line will override this list
if
(
NOT PROFILER_ONLY
)
if
(
NOT ENABLE_ASAN_PACKAGING
)
#build CK for all supported targets
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
LESS 600300000
)
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS
TARGETS
"gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102"
)
else
()
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS
TARGETS
"gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
)
endif
()
#In order to build just the CK library (without tests and examples) for all supported GPU targets
#use -D GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
#the GPU_TARGETS flag will be reset in this case in order to avoid conflicts.
#
#In order to build CK along with all tests and examples it should be OK to set GPU_TARGETS to just 1 or 2 similar architectures.
if
(
NOT ENABLE_ASAN_PACKAGING
)
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
LESS 600300000
)
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
set
(
CK_GPU_TARGETS
"gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102"
)
else
()
#build CK only for xnack-supported targets
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS
TARGETS
"gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+"
)
set
(
GPU_TARGETS
"
${
DEFAULT_GPU_TARGETS
}
"
CACHE STRING
" "
FORCE
)
set
(
CK_GPU_TARGETS
"gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
)
endif
()
else
()
add_definitions
(
-DPROFILER_ONLY
)
set
(
GPU_TARGETS
""
CACHE STRING
""
FORCE
)
#build CK only for xnack-supported targets when using ASAN
set
(
CK_GPU_TARGETS
"gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+"
)
endif
()
#if user set GPU_ARCHS on the cmake command line, overwrite default target list with user's list
#otherwise, if user set GPU_TARGETS, use that set of targets
if
(
GPU_ARCHS
)
set
(
CK_GPU_TARGETS
${
GPU_ARCHS
}
)
else
()
if
(
GPU_TARGETS
)
message
(
FATAL_ERROR
"For PROFILE_ONLY build, please do not set GPU_TARGETS, use GPU_ARCH = gfx90, gfx94, gfx10, gfx11 or gfx12"
)
endif
()
if
(
GPU_ARCH MATCHES
"gfx90"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx908;gfx90a"
)
elseif
(
GPU_ARCH MATCHES
"gfx94"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx940;gfx941;gfx942"
)
elseif
(
GPU_ARCH MATCHES
"gfx10"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx1030"
)
elseif
(
GPU_ARCH MATCHES
"gfx11"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx1100;gfx1101;gfx1102"
)
elseif
(
GPU_ARCH MATCHES
"gfx12"
)
rocm_check_target_ids
(
DEFAULT_GPU_TARGETS TARGETS
"gfx1200;gfx1201"
)
else
()
message
(
FATAL_ERROR
"For PROFILE_ONLY build, please specify GPU_ARCH as gfx90, gfx94, gfx10, gfx11 or gfx12"
)
set
(
CK_GPU_TARGETS
${
GPU_TARGETS
}
)
endif
()
set
(
GPU_TARGETS
"
${
DEFAULT_GPU_TARGETS
}
"
CACHE STRING
" "
FORCE
)
endif
()
message
(
"Supported GPU_TARGETS=
${
DEFAULT_GPU_TARGETS
}
"
)
#make sure all the targets on the list are actually supported by the current compiler
rocm_check_target_ids
(
SUPPORTED_GPU_TARGETS
TARGETS
${
CK_GPU_TARGETS
}
)
if
(
GPU_TARGETS
)
message
(
"Building CK for the following targets:
${
GPU_TARGETS
}
"
)
else
()
message
(
"Building CK for the default targets:
${
DEFAULT_GPU_TARGETS
}
"
)
endif
()
message
(
"Building CK for the following targets:
${
SUPPORTED_GPU_TARGETS
}
"
)
if
(
GPU_TARGETS
)
if
(
GPU_TARGETS MATCHES
"gfx9"
)
...
...
@@ -557,8 +545,7 @@ ENDFOREACH()
add_custom_target
(
instances DEPENDS utility;
${
CK_DEVICE_INSTANCES
}
SOURCES
${
INSTANCE_FILES
}
)
add_subdirectory
(
library
)
if
(
NOT DEFINED INSTANCES_ONLY
)
if
(
NOT DEFINED PROFILER_ONLY
)
if
(
NOT GPU_ARCHS
)
rocm_package_setup_component
(
tests
LIBRARY_NAME composablekernel
PACKAGE_NAME tests
# Prevent -static suffix on package name
...
...
@@ -569,24 +556,18 @@ if(NOT DEFINED INSTANCES_ONLY)
PACKAGE_NAME examples
)
add_subdirectory
(
example
)
add_subdirectory
(
test
)
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler
)
add_subdirectory
(
profiler
)
else
()
#When building PROFILER_ONLY, label the package with GPU_ARCH
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler_
${
GPU_ARCH
}
)
add_subdirectory
(
profiler
)
endif
()
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
endif
()
endif
()
if
(
NOT DEFINED PROFILER_ONLY
AND
(
GPU_TARGETS MATCHES
"gfx9"
OR DEFINED INSTANCES_ONLY
))
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
PACKAGE_NAME ckprofiler
)
add_subdirectory
(
profiler
)
if
(
CK_USE_CODEGEN
AND
(
GPU_TARGETS MATCHES
"gfx9"
OR GPU_ARCHS
))
add_subdirectory
(
codegen
)
endif
()
...
...
Jenkinsfile
View file @
6a25d081
...
...
@@ -320,7 +320,7 @@ def cmake_build(Map conf=[:]){
if
(
package_build
==
true
&&
(
env
.
BRANCH_NAME
==
"develop"
||
env
.
BRANCH_NAME
==
"amd-master"
))
{
archiveArtifacts
artifacts:
"build/*.deb"
,
allowEmptyArchive:
true
,
fingerprint:
true
}
if
(
params
.
RUN_CK_TILE_TESTS
){
if
(
params
.
RUN_CK_TILE_
FMHA_
TESTS
){
try
{
archiveArtifacts
"perf_fmha_fwd_*.log"
archiveArtifacts
"perf_fmha_bwd_*.log"
...
...
@@ -682,7 +682,7 @@ def process_results(Map conf=[:]){
timeout
(
time:
1
,
unit:
'HOURS'
){
try
{
dir
(
"script"
){
if
(
params
.
RUN_CK_TILE_TESTS
){
if
(
params
.
RUN_CK_TILE_
FMHA_
TESTS
){
try
{
unstash
"perf_fmha_fwd_gfx942.log"
unstash
"perf_fmha_bwd_gfx942.log"
...
...
@@ -1138,8 +1138,8 @@ pipeline {
execute_args
=
""" cmake -D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_CXX_COMPILER="${build_compiler()}" \
-D CMAKE_BUILD_TYPE=Release \
-D
INSTANCES_ONLY=ON
\
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """
-D
GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
\
-D
CMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """
}
steps
{
buildHipClangJobAndReboot
(
setup_cmd:
""
,
build_cmd:
""
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
...
...
README.md
View file @
6a25d081
...
...
@@ -90,7 +90,12 @@ Docker images are available on [DockerHub](https://hub.docker.com/r/rocm/composa
```
If you don't set `GPU_TARGETS` on the cmake command line, CK is built for all GPU targets
supported by the current compiler (this may take a long time).
supported by the current compiler (this may take a long time).
NOTE: If you try setting `GPU_TARGETS` to a list of architectures, the build will only work if the
architectures are similar, e.g., `gfx908;gfx90a`, or `gfx1100;gfx1101;gfx11012`. Otherwise, if you
want to build the library for a list of different architectures,
you should use the `GPU_ARCHS` build argument, for example `GPU_ARCHS=gfx908;gfx1030;gfx1100;gfx942`.
4.
Build the entire CK library:
...
...
@@ -137,10 +142,6 @@ crash. In such cases, you can reduce the number of threads to 32 by using `-j32`
Additional cmake flags can be used to significantly speed-up the build:
*
`INSTANCES_ONLY`
(default is OFF) must be set to ON in order to build only the instances and library
while skipping all tests, examples, and profiler. This is useful in cases when you plan to use CK as a
dependency and don't plan to run any examples or tests.
*
`DTYPES`
(default is not set) can be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build
instances of select data types only. The main default data types are fp32 and fp16; you can safely skip
other data types.
...
...
cmake/Embed.cmake
View file @
6a25d081
...
...
@@ -233,6 +233,8 @@ function(add_embed_library EMBED_NAME)
else
()
target_sources
(
${
EMBED_NAME
}
INTERFACE $<TARGET_OBJECTS:
${
INTERNAL_EMBED_LIB
}
>
)
endif
()
target_include_directories
(
${
EMBED_NAME
}
INTERFACE
"
${
EMBED_DIR
}
/include"
)
target_include_directories
(
${
EMBED_NAME
}
INTERFACE
$<BUILD_INTERFACE:
${
EMBED_DIR
}
/include>
$<INSTALL_INTERFACE:include/ck>
)
endfunction
()
codegen/CMakeLists.txt
View file @
6a25d081
...
...
@@ -39,6 +39,7 @@ set_target_properties(ck_host PROPERTIES
target_include_directories
(
ck_host PUBLIC
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
$<INSTALL_INTERFACE:include>
)
add_executable
(
ck-template-driver driver/main.cpp
)
...
...
@@ -48,6 +49,12 @@ rocm_install(
TARGETS ck_host ck_headers
EXPORT ck_hostTargets
)
rocm_install
(
EXPORT ck_hostTargets
FILE composable_kernelck_hostTargets.cmake
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
rocm_install
(
DIRECTORY include/ck DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
add_subdirectory
(
test
)
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
endif
()
codegen/test/CMakeLists.txt
View file @
6a25d081
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
add_subdirectory
(
rtc
)
file
(
GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp
)
if
(
NOT INSTANCES_ONLY
)
# do not build the tests when we build the library for various targets
if
(
NOT GPU_ARCHS
)
foreach
(
TEST_SRC
${
TEST_SRCS
}
)
set_source_files_properties
(
${
TEST_SRC
}
PROPERTIES LANGUAGE HIP
)
get_filename_component
(
BASE_NAME
${
TEST_SRC
}
NAME_WE
)
...
...
example/01_gemm/common.hpp
View file @
6a25d081
...
...
@@ -21,6 +21,7 @@
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
struct
ProblemSize
final
{
...
...
@@ -28,9 +29,9 @@ struct ProblemSize final
ck
::
index_t
N
=
4096
;
ck
::
index_t
K
=
4096
;
ck
::
index_t
StrideA
=
4096
;
ck
::
index_t
StrideB
=
4096
;
ck
::
index_t
StrideC
=
4096
;
ck
::
index_t
StrideA
=
0
;
ck
::
index_t
StrideB
=
0
;
ck
::
index_t
StrideC
=
0
;
};
struct
ProblemSizeStreamK
final
...
...
@@ -39,9 +40,9 @@ struct ProblemSizeStreamK final
ck
::
index_t
N
=
4096
;
ck
::
index_t
K
=
4096
;
ck
::
index_t
StrideA
=
4096
;
ck
::
index_t
StrideB
=
4096
;
ck
::
index_t
StrideC
=
4096
;
ck
::
index_t
StrideA
=
0
;
ck
::
index_t
StrideB
=
0
;
ck
::
index_t
StrideC
=
0
;
ck
::
index_t
NumSKBlocks
=
-
1
;
};
...
...
@@ -51,9 +52,9 @@ struct ProblemSizeStreamK_universal final
ck
::
index_t
N
=
4096
;
ck
::
index_t
K
=
4096
;
ck
::
index_t
StrideA
=
4096
;
ck
::
index_t
StrideB
=
4096
;
ck
::
index_t
StrideC
=
4096
;
ck
::
index_t
StrideA
=
0
;
ck
::
index_t
StrideB
=
0
;
ck
::
index_t
StrideC
=
0
;
ck
::
index_t
Grid_size
=
-
1
;
// defaults to max occupancy
ck
::
index_t
Streamk_sel
=
1
;
// defaults to 1-tile SK
...
...
@@ -65,9 +66,9 @@ struct ProblemSizeSplitK final
ck
::
index_t
N
=
4096
;
ck
::
index_t
K
=
4096
;
ck
::
index_t
StrideA
=
4096
;
ck
::
index_t
StrideB
=
4096
;
ck
::
index_t
StrideC
=
4096
;
ck
::
index_t
StrideA
=
0
;
ck
::
index_t
StrideB
=
0
;
ck
::
index_t
StrideC
=
0
;
ck
::
index_t
KBatch
=
1
;
};
...
...
@@ -125,7 +126,7 @@ bool parse_cmd_args<ProblemSize>(int argc,
}
else
{
std
::
cerr
<<
"arg1: verification (0=no, 1=
yes
)"
<<
std
::
endl
std
::
cerr
<<
"arg1: verification (0=no, 1=
CPU and GPU
)"
<<
std
::
endl
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<<
std
::
endl
<<
"arg3: time kernel (0=no, 1=yes)"
<<
std
::
endl
...
...
@@ -175,7 +176,7 @@ bool parse_cmd_args<ProblemSizeStreamK_universal>(int argc,
else
{
std
::
cerr
<<
"arg1: verification (0=no, 1=
yes
)"
<<
std
::
endl
<<
"arg1: verification (0=no, 1=
CPU and GPU
)"
<<
std
::
endl
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<<
std
::
endl
<<
"arg3: time kernel (0=no, 1=yes)"
<<
std
::
endl
<<
"arg4 to 9: M (256x), N(128x), K(32x), StrideA, StrideB, StrideC"
<<
std
::
endl
...
...
@@ -224,7 +225,7 @@ bool parse_cmd_args<ProblemSizeStreamK>(int argc,
}
else
{
std
::
cerr
<<
"arg1: verification (0=no, 1=
yes
)"
<<
std
::
endl
std
::
cerr
<<
"arg1: verification (0=no, 1=
CPU and GPU
)"
<<
std
::
endl
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<<
std
::
endl
<<
"arg3: time kernel (0=no, 1=yes)"
<<
std
::
endl
...
...
@@ -274,7 +275,7 @@ bool parse_cmd_args<ProblemSizeSplitK>(int argc,
}
else
{
std
::
cerr
<<
"arg1: verification (0=no, 1=
yes
)"
<<
std
::
endl
std
::
cerr
<<
"arg1: verification (0=no, 1=
CPU and GPU
)"
<<
std
::
endl
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<<
std
::
endl
<<
"arg3: time kernel (0=no, 1=yes)"
<<
std
::
endl
...
...
example/01_gemm/gemm_dl_fp16.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -32,6 +32,17 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmDl
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_dl_fp32.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -32,6 +32,17 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmDl
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_dl_int8.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -32,6 +32,17 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmDl
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_dpp_fp16.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -34,6 +34,9 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmDpp
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_wmma_fp16.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -68,6 +68,17 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_xdl_bf16.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -33,6 +33,20 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemm_Xdl_CShuffle
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceComputeType
=
float
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
,
ReferenceComputeType
,
ReferenceComputeType
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_xdl_bf16_rtn.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -34,6 +34,20 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemm_Xdl_CShuffle
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceComputeType
=
float
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
,
ReferenceComputeType
,
ReferenceComputeType
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_xdl_fp16.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -47,6 +47,17 @@ using DeviceGemmInstance = DeviceGemmInstance1;
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_xdl_fp16_fp8.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -42,6 +42,17 @@ using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataTyp
CElementOp
,
ComputeType
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_xdl_fp16_v2.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -46,6 +46,17 @@ using DeviceGemmInstance =
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_xdl_fp64.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -41,6 +41,17 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmXdl
BElementOp
,
CElementOp
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_xdl_fp8.cpp
View file @
6a25d081
...
...
@@ -37,6 +37,20 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemm_Xdl_CShuffle
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
using
ReferenceComputeType
=
float
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
,
ReferenceComputeType
,
ReferenceComputeType
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
example/01_gemm/gemm_xdl_fp8_bf8.cpp
View file @
6a25d081
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
...
...
@@ -44,6 +44,17 @@ using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataTyp
ComputeTypeA
,
ComputeTypeB
>
;
using
ReferenceGemmInstanceGPU
=
ck
::
tensor_operation
::
device
::
ReferenceGemm
<
ALayout
,
BLayout
,
CLayout
,
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
AElementOp
,
BElementOp
,
CElementOp
>
;
#include "run_gemm_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_gemm_example
(
argc
,
argv
);
}
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment