Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
cba8f7f2
Commit
cba8f7f2
authored
Jun 26, 2022
by
Anthony Chang
Browse files
Merge remote-tracking branch 'upstream/develop' into gemm-layernorm-4
parents
cc50b687
b653c5eb
Changes
583
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
272 additions
and
203 deletions
+272
-203
.gitignore
.gitignore
+1
-1
CMakeLists.txt
CMakeLists.txt
+20
-9
Dockerfile
Dockerfile
+5
-0
Jenkinsfile
Jenkinsfile
+17
-17
LICENSE
LICENSE
+28
-0
README.md
README.md
+5
-2
cmake/googletest.cmake
cmake/googletest.cmake
+6
-2
example/01_gemm/gemm_dl_fp16.cpp
example/01_gemm/gemm_dl_fp16.cpp
+14
-13
example/01_gemm/gemm_dl_fp32.cpp
example/01_gemm/gemm_dl_fp32.cpp
+14
-13
example/01_gemm/gemm_dl_int8.cpp
example/01_gemm/gemm_dl_int8.cpp
+14
-13
example/01_gemm/gemm_xdl_bf16.cpp
example/01_gemm/gemm_xdl_bf16.cpp
+14
-13
example/01_gemm/gemm_xdl_fp16.cpp
example/01_gemm/gemm_xdl_fp16.cpp
+14
-13
example/01_gemm/gemm_xdl_fp64.cpp
example/01_gemm/gemm_xdl_fp64.cpp
+15
-14
example/01_gemm/gemm_xdl_int8.cpp
example/01_gemm/gemm_xdl_int8.cpp
+15
-13
example/02_gemm_alpha_beta/gemm_xdl_alpha_beta.cpp
example/02_gemm_alpha_beta/gemm_xdl_alpha_beta.cpp
+14
-15
example/03_gemm_bias_relu/gemm_xdl_bias_relu.cpp
example/03_gemm_bias_relu/gemm_xdl_bias_relu.cpp
+15
-12
example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp
..._gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp
+15
-12
example/06_conv2d_fwd_bias_relu/conv2d_fwd_xdl_bias_relu.cpp
example/06_conv2d_fwd_bias_relu/conv2d_fwd_xdl_bias_relu.cpp
+15
-14
example/07_conv2d_fwd_bias_relu_add/conv2d_fwd_xdl_bias_relu_add.cpp
...conv2d_fwd_bias_relu_add/conv2d_fwd_xdl_bias_relu_add.cpp
+15
-14
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
+16
-13
No files found.
.gitignore
View file @
cba8f7f2
...
@@ -45,4 +45,4 @@ build*
...
@@ -45,4 +45,4 @@ build*
*~
*~
# GDB temporary files
# GDB temporary files
.gdb_history
.gdb_history
\ No newline at end of file
CMakeLists.txt
View file @
cba8f7f2
...
@@ -7,7 +7,8 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
...
@@ -7,7 +7,8 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
enable_testing
()
enable_testing
()
find_package
(
ROCM REQUIRED PATHS /opt/rocm
)
set
(
ROCM_SYMLINK_LIBS OFF
)
find_package
(
ROCM 0.8 REQUIRED PATHS /opt/rocm
)
include
(
ROCMInstallTargets
)
include
(
ROCMInstallTargets
)
include
(
ROCMPackageConfigHelpers
)
include
(
ROCMPackageConfigHelpers
)
...
@@ -16,7 +17,7 @@ include(ROCMInstallSymlinks)
...
@@ -16,7 +17,7 @@ include(ROCMInstallSymlinks)
include
(
ROCMCreatePackage
)
include
(
ROCMCreatePackage
)
include
(
CheckCXXCompilerFlag
)
include
(
CheckCXXCompilerFlag
)
rocm_setup_version
(
VERSION
1.0
.0
)
rocm_setup_version
(
VERSION
0.2
.0
)
include
(
TargetFlags
)
include
(
TargetFlags
)
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
)
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
)
...
@@ -70,7 +71,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
...
@@ -70,7 +71,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
endif
()
endif
()
message
(
STATUS
"Build with HIP
${
HIP_VERSION
}
"
)
message
(
STATUS
"Build with HIP
${
HIP_VERSION
}
"
)
rocm_create_package
(
rocm_create_package
(
NAME composablekernel
NAME composablekernel
DESCRIPTION
"High Performance Composable Kernel for AMD GPUs"
DESCRIPTION
"High Performance Composable Kernel for AMD GPUs"
...
@@ -78,10 +78,6 @@ rocm_create_package(
...
@@ -78,10 +78,6 @@ rocm_create_package(
LDCONFIG
LDCONFIG
)
)
## half
set
(
HALF_INCLUDE_DIR
"
${
PROJECT_SOURCE_DIR
}
/external/include/half"
)
message
(
"HALF_INCLUDE_DIR:
${
HALF_INCLUDE_DIR
}
"
)
## tidy
## tidy
include
(
EnableCompilerWarnings
)
include
(
EnableCompilerWarnings
)
set
(
CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name
)
set
(
CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name
)
...
@@ -229,7 +225,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
...
@@ -229,7 +225,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
include_directories
(
BEFORE
include_directories
(
BEFORE
${
PROJECT_SOURCE_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/include
${
PROJECT_BINARY_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/library/include
${
PROJECT_SOURCE_DIR
}
/library/include
)
)
...
@@ -243,6 +238,11 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
...
@@ -243,6 +238,11 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
add_custom_target
(
check COMMAND
${
CMAKE_CTEST_COMMAND
}
--output-on-failure -C
${
CMAKE_CFG_INTDIR
}
)
add_custom_target
(
check COMMAND
${
CMAKE_CTEST_COMMAND
}
--output-on-failure -C
${
CMAKE_CFG_INTDIR
}
)
rocm_package_setup_component
(
tests
LIBRARY_NAME composablekernel
PACKAGE_NAME tests
# Prevent -static suffix on package name
)
add_subdirectory
(
library
)
add_subdirectory
(
library
)
add_subdirectory
(
example
)
add_subdirectory
(
example
)
add_subdirectory
(
test
)
add_subdirectory
(
test
)
...
@@ -264,8 +264,19 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
...
@@ -264,8 +264,19 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
NO_CHECK_REQUIRED_COMPONENTS_MACRO
NO_CHECK_REQUIRED_COMPONENTS_MACRO
)
)
install
(
FILES
rocm_
install
(
FILES
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfig.cmake"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfig.cmake"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfigVersion.cmake"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfigVersion.cmake"
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
)
set
(
CPACK_RESOURCE_FILE_LICENSE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/LICENSE"
)
set
(
CPACK_RPM_PACKAGE_LICENSE
"MIT"
)
rocm_create_package
(
NAME composablekernel
DESCRIPTION
"High Performance Composable Kernel for AMD GPUs"
MAINTAINER
"MIOpen Kernels Dev Team <dl.MIOpen@amd.com>"
LDCONFIG
HEADER_ONLY
)
Dockerfile
View file @
cba8f7f2
...
@@ -88,3 +88,8 @@ ADD rbuild.ini /rbuild.ini
...
@@ -88,3 +88,8 @@ ADD rbuild.ini /rbuild.ini
ADD
dev-requirements.txt dev-requirements.txt
ADD
dev-requirements.txt dev-requirements.txt
RUN
rbuild prepare
-s
develop
-d
$PREFIX
RUN
rbuild prepare
-s
develop
-d
$PREFIX
RUN
groupadd
-f
render
RUN
groupadd
-f
render
# Install the new rocm-cmake version
RUN
git clone
-b
master https://github.com/RadeonOpenCompute/rocm-cmake.git
&&
\
cd
rocm-cmake
&&
mkdir
build
&&
cd
build
&&
\
cmake ..
&&
cmake
--build
.
&&
cmake
--build
.
--target
install
Jenkinsfile
View file @
cba8f7f2
...
@@ -379,23 +379,23 @@ pipeline {
...
@@ -379,23 +379,23 @@ pipeline {
}
}
}
}
}
}
stage
(
"Client App"
)
//
stage("Client App")
{
//
{
parallel
//
parallel
{
//
{
stage
(
"Run Client App"
)
//
stage("Run Client App")
{
//
{
agent
{
label
rocmnode
(
"gfx908"
)}
//
agent{ label rocmnode("gfx908")}
environment
{
//
environment{
setup_args
=
""" -D -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " """
//
setup_args = """ -D -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " """
execute_args
=
""" cd ../test/client_app && rm -rf build && mkdir build && cd build && cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" .. && make """
//
execute_args = """ cd ../test/client_app && rm -rf build && mkdir build && cd build && cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" .. && make """
}
//
}
steps
{
//
steps{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
config_targets:
"install"
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
,
prefixpath:
'/usr/local'
)
//
buildHipClangJobAndReboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
}
//
}
}
//
}
}
//
}
}
//
}
stage
(
"Performance Tests"
)
stage
(
"Performance Tests"
)
{
{
parallel
parallel
...
...
LICENSE
0 → 100644
View file @
cba8f7f2
Copyright (c) 2018- , Advanced Micro Devices, Inc. (Chao Liu, Jing Zhang)
Copyright (c) 2019- , Advanced Micro Devices, Inc. (Letao Qin, Qianfeng Zhang, Liang Huang, Shaojie Wang)
Copyright (c) 2022- , Advanced Micro Devices, Inc. (Anthony Chang, Chunyu Lai, Illia Silin, Adam Osewski, Poyen Chen, Jehandad Khan)
Copyright (c) 2019-2021, Advanced Micro Devices, Inc. (Hanwen Chang)
Copyright (c) 2019-2020, Advanced Micro Devices, Inc. (Tejash Shah)
Copyright (c) 2020 , Advanced Micro Devices, Inc. (Xiaoyan Zhou)
Copyright (c) 2021-2022, Advanced Micro Devices, Inc. (Jianfeng Yan)
SPDX-License-Identifier: MIT
Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
README.md
View file @
cba8f7f2
...
@@ -6,10 +6,13 @@ docker run \
...
@@ -6,10 +6,13 @@ docker run \
--group-add
sudo
\
--group-add
sudo
\
-w
/root/workspace
\
-w
/root/workspace
\
-v
${
PATH_TO_LOCAL_WORKSPACE
}
:/root/workspace
\
-v
${
PATH_TO_LOCAL_WORKSPACE
}
:/root/workspace
\
rocm/tensorflow:rocm
4.3
.1-tf2.6-dev
\
rocm/tensorflow:rocm
5
.1-tf2.6-dev
\
/bin/bash
/bin/bash
```
```
# Install the new rocm-cmake version
https://github.com/RadeonOpenCompute/rocm-cmake
## Build
## Build
```
bash
```
bash
mkdir
build
&&
cd
build
mkdir
build
&&
cd
build
...
@@ -34,7 +37,7 @@ Instructions for running each individual examples are under ```example/```
...
@@ -34,7 +37,7 @@ Instructions for running each individual examples are under ```example/```
## Tests
## Tests
```
bash
```
bash
make
-j
tests
make
-j
examples
tests
make
test
make
test
```
```
...
...
cmake/googletest.cmake
View file @
cba8f7f2
...
@@ -8,7 +8,7 @@ endif()
...
@@ -8,7 +8,7 @@ endif()
message
(
STATUS
"Fetching GoogleTest"
)
message
(
STATUS
"Fetching GoogleTest"
)
list
(
APPEND GTEST_CMAKE_CXX_FLAGS
list
(
APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-undef
-Wno-undef
-Wno-reserved-identifier
-Wno-reserved-identifier
-Wno-global-constructors
-Wno-global-constructors
...
@@ -31,7 +31,11 @@ FetchContent_Declare(
...
@@ -31,7 +31,11 @@ FetchContent_Declare(
# Will be necessary for windows build
# Will be necessary for windows build
# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable
(
googletest
)
FetchContent_GetProperties
(
googletest
)
if
(
NOT googletest_POPULATED
)
FetchContent_Populate
(
googletest
)
add_subdirectory
(
${
googletest_SOURCE_DIR
}
${
googletest_BINARY_DIR
}
EXCLUDE_FROM_ALL
)
endif
()
target_compile_options
(
gtest PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gtest PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gtest_main PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gtest_main PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
...
...
example/01_gemm/gemm_dl_fp16.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "ck/library/utility/check_err.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/01_gemm/gemm_dl_fp32.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "ck/library/utility/check_err.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/01_gemm/gemm_dl_int8.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "ck/library/utility/check_err.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/01_gemm/gemm_xdl_bf16.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/01_gemm/gemm_xdl_fp16.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "device.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/utility/check_err.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/01_gemm/gemm_xdl_fp64.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl.hpp"
#include "device.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_xdl.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/01_gemm/gemm_xdl_int8.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "device.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/utility/check_err.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "gemm_specialization.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/02_gemm_alpha_beta/gemm_xdl_alpha_beta.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "print.hpp"
#include "device.hpp"
#include "ck/library/utility/check_err.hpp"
#include "host_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "host_gemm.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp"
#include "device_base.hpp"
#include "device_gemm_xdl_c_shuffle_bias_2d.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm_bias_2d.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/03_gemm_bias_relu/gemm_xdl_bias_relu.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include "ck/ck.hpp"
#include "c
heck_err
.hpp"
#include "c
k/tensor_operation/gpu/device/tensor_layout
.hpp"
#include "c
onfig
.hpp"
#include "c
k/tensor_operation/gpu/device/gemm_specialization
.hpp"
#include "
devic
e.hpp"
#include "
ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffl
e.hpp"
#include "
host_tensor
.hpp"
#include "
ck/tensor_operation/gpu/element/element_wise_operation
.hpp"
#include "host_tensor_generator.hpp"
#include "device_
tens
or.hpp"
#include "
ck/library/host_tensor/
device_
mem
or
y
.hpp"
#include "
element_wise_operation
.hpp"
#include "
ck/library/host_tensor/host_tensor
.hpp"
#include "
reference_gemm
.hpp"
#include "
ck/library/host_tensor/host_tensor_generator
.hpp"
#include "
gemm_specialization
.hpp"
#include "
ck/library/reference_tensor_operation/cpu/reference_gemm
.hpp"
#include "
device_gemm_multiple_d_xdl_cshuffle
.hpp"
#include "
ck/library/utility/check_err
.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include "ck/ck.hpp"
#include "c
heck_err
.hpp"
#include "c
k/tensor_operation/gpu/device/tensor_layout
.hpp"
#include "c
onfig
.hpp"
#include "c
k/tensor_operation/gpu/device/gemm_specialization
.hpp"
#include "
devic
e.hpp"
#include "
ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffl
e.hpp"
#include "
host_tensor
.hpp"
#include "
ck/tensor_operation/gpu/element/element_wise_operation
.hpp"
#include "host_tensor_generator.hpp"
#include "device_
tens
or.hpp"
#include "
ck/library/host_tensor/
device_
mem
or
y
.hpp"
#include "
element_wise_operation
.hpp"
#include "
ck/library/host_tensor/host_tensor
.hpp"
#include "
reference_gemm
.hpp"
#include "
ck/library/host_tensor/host_tensor_generator
.hpp"
#include "
gemm_specialization
.hpp"
#include "
ck/library/reference_tensor_operation/cpu/reference_gemm
.hpp"
#include "
device_gemm_multiple_d_xdl_cshuffle
.hpp"
#include "
ck/library/utility/check_err
.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
example/06_conv2d_fwd_bias_relu/conv2d_fwd_xdl_bias_relu.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "conv_util.hpp"
#include "device.hpp"
#include "ck/library/utility/check_err.hpp"
#include "device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp"
#include "reference_conv_fwd_bias_activation.hpp"
#include "tensor_layout.hpp"
namespace
{
namespace
{
...
...
example/07_conv2d_fwd_bias_relu_add/conv2d_fwd_xdl_bias_relu_add.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp"
#include "ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp"
#include "config.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "conv_util.hpp"
#include "device.hpp"
#include "ck/library/utility/check_err.hpp"
#include "device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "device_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp"
#include "reference_conv_fwd_bias_activation_add.hpp"
#include "tensor_layout.hpp"
namespace
{
namespace
{
...
...
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <cstdlib>
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <type_traits>
#include <type_traits>
#include "c
heck_err
.hpp"
#include "c
k/ck
.hpp"
#include "c
onfig
.hpp"
#include "c
k/tensor_operation/gpu/device/tensor_layout
.hpp"
#include "c
onv_util
.hpp"
#include "c
k/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk
.hpp"
#include "
device
.hpp"
#include "
ck/tensor_operation/gpu/element/element_wise_operation
.hpp"
#include "device_tensor.hpp"
#include "
device_convnd_fwd_xdl_nhwc_kyxc_nhwk
.hpp"
#include "
ck/library/utility/check_err
.hpp"
#include "
element_wise_operation
.hpp"
#include "
ck/library/utility/conv_util
.hpp"
#include "
host_tensor
.hpp"
#include "
ck/library/host_tensor/device_memory
.hpp"
#include "host_tensor
_generat
or.hpp"
#include "
ck/library/
host_tensor
/host_tens
or.hpp"
#include "
reference_conv_fwd
.hpp"
#include "
ck/library/host_tensor/host_tensor_generator
.hpp"
#include "
tensor_layout
.hpp"
#include "
ck/library/reference_tensor_operation/cpu/reference_conv_fwd
.hpp"
namespace
{
namespace
{
...
@@ -291,8 +294,8 @@ int main(int argc, char* argv[])
...
@@ -291,8 +294,8 @@ int main(int argc, char* argv[])
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
conv
->
GetTypeString
()
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
std
::
endl
;
<<
conv
->
GetTypeString
()
<<
std
::
endl
;
if
(
do_verification
)
if
(
do_verification
)
{
{
...
...
Prev
1
2
3
4
5
…
30
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment