Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6ac1d6a2
Commit
6ac1d6a2
authored
Mar 11, 2024
by
illsilin
Browse files
merging from public repo
parents
e60c5aea
42fc8edd
Changes
303
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
141 additions
and
85 deletions
+141
-85
.github/CODEOWNERS
.github/CODEOWNERS
+6
-6
Dockerfile
Dockerfile
+7
-4
Jenkinsfile
Jenkinsfile
+100
-49
client_example/01_gemm/gemm.cpp
client_example/01_gemm/gemm.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
...xample/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
...2_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
...nt_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
...le/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
...xample/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
+2
-2
client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
...xample/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
+4
-2
client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
...03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
+2
-2
client_example/04_contraction/contraction_bilinear_fp32.cpp
client_example/04_contraction/contraction_bilinear_fp32.cpp
+1
-1
client_example/04_contraction/contraction_bilinear_fp64.cpp
client_example/04_contraction/contraction_bilinear_fp64.cpp
+1
-1
client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
...mple/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
+1
-1
client_example/04_contraction/contraction_scale_fp32.cpp
client_example/04_contraction/contraction_scale_fp32.cpp
+1
-1
client_example/04_contraction/contraction_scale_fp64.cpp
client_example/04_contraction/contraction_scale_fp64.cpp
+1
-1
client_example/05_layernorm/layernorm2d_bwd_data.cpp
client_example/05_layernorm/layernorm2d_bwd_data.cpp
+1
-1
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
+1
-1
client_example/05_layernorm/layernorm2d_fwd.cpp
client_example/05_layernorm/layernorm2d_fwd.cpp
+1
-1
No files found.
.github/CODEOWNERS
View file @
6ac1d6a2
* @zjing14
@asroy
@junliume @illsilin @carlushuang @aosewski
* @zjing14 @junliume @illsilin @carlushuang @aosewski
# Documentation files
docs/* @
saadrahim @LisaDelaney
*.md
@saadrahim @LisaDelaney
*.rst
@saadrahim @LisaDelaney
# Header directory
library/include/*
@saadrahim @LisaDelaney
docs/* @
ROCm/rocm-documentation
*.md
@ROCm/rocm-documentation
*.rst
@ROCm/rocm-documentation
# Header directory
for Doxygen documentation
library/include/*
@ROCm/rocm-documentation
Dockerfile
View file @
6ac1d6a2
...
...
@@ -44,7 +44,6 @@ ENV PATH=$PATH:${SCCACHE_INSTALL_LOCATION}
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
build-essential
\
cmake
\
ccache
\
git
\
hip-rocclr
\
iputils-ping
\
...
...
@@ -74,6 +73,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
apt-get clean
&&
\
rm
-rf
/var/lib/apt/lists/
*
#Install latest ccache
RUN
git clone https://github.com/ccache/ccache.git
&&
\
cd
ccache
&&
mkdir
build
&&
cd
build
&&
cmake ..
&&
make
install
#Install ninja build tracing tools
RUN
wget
-qO
/usr/local/bin/ninja.gz https://github.com/ninja-build/ninja/releases/latest/download/ninja-linux.zip
RUN
gunzip
/usr/local/bin/ninja.gz
...
...
@@ -111,7 +114,7 @@ ENV LANG=C.UTF-8
RUN
groupadd
-f
render
# Install the new rocm-cmake version
RUN
git clone
-b
master https://github.com/R
adeonOpenCompute
/rocm-cmake.git
&&
\
RUN
git clone
-b
master https://github.com/R
OCm
/rocm-cmake.git
&&
\
cd
rocm-cmake
&&
mkdir
build
&&
cd
build
&&
\
cmake ..
&&
cmake
--build
.
&&
cmake
--build
.
--target
install
...
...
@@ -123,7 +126,7 @@ RUN sh -c "echo compiler version = '$compiler_version'"
RUN
sh
-c
"echo compiler commit = '
$compiler_commit
'"
RUN if
(
[
"
$compiler_version
"
=
"amd-staging"
]
||
[
"
$compiler_version
"
=
"amd-mainline-open"
]
)
&&
[
"
$compiler_commit
"
=
""
]
;
then
\
git clone
-b
"
$compiler_version
"
https://github.com/R
adeonOpenCompute
/llvm-project.git
&&
\
git clone
-b
"
$compiler_version
"
https://github.com/R
OCm
/llvm-project.git
&&
\
cd
llvm-project
&&
mkdir
build
&&
cd
build
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld"
-DLLVM_ENABLE_RUNTIMES
=
"compiler-rt"
../llvm
&&
\
make
-j
8
;
\
...
...
@@ -131,7 +134,7 @@ RUN if ( [ "$compiler_version" = "amd-staging" ] || [ "$compiler_version" = "amd
fi
RUN if
(
[
"
$compiler_version
"
=
"amd-staging"
]
||
[
"
$compiler_version
"
=
"amd-mainline-open"
]
)
&&
[
"
$compiler_commit
"
!=
""
]
;
then
\
git clone
-b
"
$compiler_version
"
https://github.com/R
adeonOpenCompute
/llvm-project.git
&&
\
git clone
-b
"
$compiler_version
"
https://github.com/R
OCm
/llvm-project.git
&&
\
cd
llvm-project
&&
git checkout
"
$compiler_commit
"
&&
echo
"checking out commit
$compiler_commit
"
&&
mkdir
build
&&
cd
build
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld"
-DLLVM_ENABLE_RUNTIMES
=
"compiler-rt"
../llvm
&&
\
make
-j
8
;
\
...
...
Jenkinsfile
View file @
6ac1d6a2
def
rocmnode
(
name
)
{
return
'(rocmtest || miopen) && '
+
name
return
'(rocmtest || miopen) &&
(
'
+
name
+
')'
}
def
show_node_info
()
{
...
...
@@ -7,6 +7,7 @@ def show_node_info() {
echo "NODE_NAME = \$NODE_NAME"
lsb_release -sd
uname -r
cat /sys/module/amdgpu/version
ls /opt/ -la
"""
}
...
...
@@ -33,6 +34,10 @@ def runShell(String command){
def
getDockerImageName
(){
def
img
if
(
params
.
USE_CUSTOM_DOCKER
!=
""
){
img
=
"${params.USE_CUSTOM_DOCKER}"
}
else
{
if
(
params
.
ROCMVERSION
!=
"6.0.1"
){
if
(
params
.
COMPILER_VERSION
==
""
)
{
img
=
"${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}"
...
...
@@ -61,6 +66,7 @@ def getDockerImageName(){
}
}
}
}
return
img
}
...
...
@@ -134,7 +140,9 @@ def buildDocker(install_prefix){
//force building the new docker if that parameter is true
echo
"Building image: ${image_name}"
retimage
=
docker
.
build
(
"${image_name}"
,
dockerArgs
+
' .'
)
retimage
.
push
()
withDockerRegistry
([
credentialsId:
"docker_test_cred"
,
url:
""
])
{
retimage
.
push
()
}
sh
'docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi'
}
else
{
...
...
@@ -146,7 +154,9 @@ def buildDocker(install_prefix){
catch
(
Exception
ex
){
echo
"Unable to locate image: ${image_name}. Building image now"
retimage
=
docker
.
build
(
"${image_name}"
,
dockerArgs
+
' .'
)
retimage
.
push
()
withDockerRegistry
([
credentialsId:
"docker_test_cred"
,
url:
""
])
{
retimage
.
push
()
}
}
}
...
...
@@ -254,18 +264,24 @@ def cmake_build(Map conf=[:]){
"""
)
sh
cmd3
}
def
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake ${setup_args} .. "
)
// reduce parallelism when compiling, clang uses too much memory
def
nt
=
nthreads
()
def
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} dumb-init make -j${nt} ${config_targets}"
)
def
cmd
def
execute_cmd
=
conf
.
get
(
"execute_cmd"
,
""
)
def
cmd
=
conf
.
get
(
"cmd"
,
"""
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)){
def
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake ${setup_args} .. "
)
def
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} dumb-init make -j${nt} ${config_targets}"
)
cmd
=
conf
.
get
(
"cmd"
,
"""
${setup_cmd}
${build_cmd}
${execute_cmd}
"""
)
}
else
{
cmd
=
conf
.
get
(
"cmd"
,
"""
${execute_cmd}
"""
)
}
echo
cmd
...
...
@@ -361,8 +377,8 @@ def runCKProfiler(Map conf=[:]){
(
retimage
,
image
)
=
getDockerImage
(
conf
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'
PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" cl
info | tee
cl
info.log'
if
(
runShell
(
'grep -n "
Number of devices:.*. 0" cl
info.log'
)
){
sh
'
rocm
info | tee
rocm
info.log'
if
(
!
runShell
(
'grep -n "
gfx" rocm
info.log'
)
){
throw
new
Exception
(
"GPU not found"
)
}
else
{
...
...
@@ -375,20 +391,6 @@ def runCKProfiler(Map conf=[:]){
echo
"The job was cancelled or aborted"
throw
e
}
catch
(
Exception
ex
)
{
retimage
=
docker
.
build
(
"${image}"
,
dockerArgs
+
" --no-cache ."
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
if
(
runShell
(
'grep -n "Number of devices:.*. 0" clinfo.log'
)
){
throw
new
Exception
(
"GPU not found"
)
}
else
{
echo
"GPU is OK"
}
}
}
}
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
timeout
(
time:
24
,
unit:
'HOURS'
)
...
...
@@ -469,6 +471,7 @@ def Build_CK(Map conf=[:]){
show_node_info
()
env
.
HSA_ENABLE_SDMA
=
0
env
.
DOCKER_BUILDKIT
=
1
checkout
scm
def
image
=
getDockerImageName
()
...
...
@@ -483,25 +486,35 @@ def Build_CK(Map conf=[:]){
if
(
params
.
COMPILER_VERSION
==
"amd-staging"
||
params
.
COMPILER_VERSION
==
"amd-mainline-open"
||
params
.
COMPILER_COMMIT
!=
""
){
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
}
def
video_id
=
sh
(
returnStdout:
true
,
script:
'getent group video | cut -d: -f3'
)
def
render_id
=
sh
(
returnStdout:
true
,
script:
'getent group render | cut -d: -f3'
)
dockerOpts
=
dockerOpts
+
" --group-add=${video_id} --group-add=${render_id} "
echo
"Docker flags: ${dockerOpts}"
def
variant
=
env
.
STAGE_NAME
def
retimage
def
navi_node
=
0
def
mi300_node
=
0
gitStatusWrapper
(
credentialsId:
"${status_wrapper_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel-internal'
)
{
gitStatusWrapper
(
credentialsId:
"${
env.
status_wrapper_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel-internal'
)
{
try
{
(
retimage
,
image
)
=
getDockerImage
(
conf
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'
PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" cl
info | tee
cl
info.log'
if
(
runShell
(
'grep -n "
Number of devices:.*. 0" cl
info.log'
)
){
sh
'
rocm
info | tee
rocm
info.log'
if
(
!
runShell
(
'grep -n "
gfx" rocm
info.log'
)
){
throw
new
Exception
(
"GPU not found"
)
}
else
{
echo
"GPU is OK"
}
if
(
runShell
(
'grep -n "gfx1030"
cl
info.log'
)
||
runShell
(
'grep -n "gfx1101"
cl
info.log'
)
){
if
(
runShell
(
'grep -n "gfx1030"
rocm
info.log'
)
||
runShell
(
'grep -n "gfx1101"
rocm
info.log'
)
){
navi_node
=
1
echo
"This is a Navi node"
}
if
(
runShell
(
'grep -n "gfx942" rocminfo.log'
)
){
mi300_node
=
1
echo
"This is MI300 node"
}
}
}
...
...
@@ -510,23 +523,6 @@ def Build_CK(Map conf=[:]){
echo
"The job was cancelled or aborted"
throw
e
}
catch
(
Exception
ex
)
{
retimage
=
docker
.
build
(
"${image}"
,
dockerArgs
+
" --no-cache ."
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log'
if
(
runShell
(
'grep -n "Number of devices:.*. 0" clinfo.log'
)
){
throw
new
Exception
(
"GPU not found"
)
}
else
{
echo
"GPU is OK"
}
if
(
runShell
(
'grep -n "gfx1030" clinfo.log'
)
||
runShell
(
'grep -n "gfx1101" clinfo.log'
)
){
navi_node
=
1
}
}
}
}
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
timeout
(
time:
24
,
unit:
'HOURS'
)
{
...
...
@@ -540,8 +536,8 @@ def Build_CK(Map conf=[:]){
sh
'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
stash
"ckProfiler.tar.gz"
}
if
(
params
.
RUN_FULL_QA
){
// build deb packages
if
(
params
.
RUN_FULL_QA
&&
mi300_node
==
0
){
// build deb packages
for all MI100/200/300 targets and prepare to export
sh
'make -j package'
archiveArtifacts
artifacts:
'composablekernel-ckprofiler_*.deb'
archiveArtifacts
artifacts:
'composablekernel-tests_*.deb'
...
...
@@ -606,7 +602,7 @@ def process_results(Map conf=[:]){
def
variant
=
env
.
STAGE_NAME
def
retimage
gitStatusWrapper
(
credentialsId:
"${status_wrapper_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel-internal'
)
{
gitStatusWrapper
(
credentialsId:
"${
env.
status_wrapper_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel-internal'
)
{
try
{
(
retimage
,
image
)
=
getDockerImage
(
conf
)
}
...
...
@@ -665,6 +661,10 @@ pipeline {
name:
"BUILD_DOCKER"
,
defaultValue:
false
,
description:
"Force building docker image (default: false), set to true if docker image needs to be updated."
)
string
(
name:
'USE_CUSTOM_DOCKER'
,
defaultValue:
''
,
description:
'If you want to use a custom docker image, please specify it here (default: leave blank).'
)
string
(
name:
'ROCMVERSION'
,
defaultValue:
'6.0'
,
...
...
@@ -709,6 +709,10 @@ pipeline {
name:
"RUN_PERFORMANCE_TESTS"
,
defaultValue:
false
,
description:
"Run the performance tests (default: OFF)"
)
booleanParam
(
name:
"RUN_CODEGEN_TESTS"
,
defaultValue:
true
,
description:
"Run the codegen tests (default: ON)"
)
}
environment
{
dbuser
=
"${dbuser}"
...
...
@@ -787,7 +791,34 @@ pipeline {
}
}
}
stage
(
"Run Codegen Tests"
)
{
parallel
{
stage
(
"Run Codegen Tests on MI100/MI200"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_CODEGEN_TESTS
.
toBoolean
()
}
}
options
{
retry
(
2
)
}
agent
{
label
rocmnode
(
"gfx908 || gfx90a"
)}
environment
{
setup_args
=
"NO_CK_BUILD"
execute_args
=
""" cd ../codegen && rm -rf build && mkdir build && cd build && \
cmake -D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
-D CMAKE_BUILD_TYPE=Release \
-D GPU_TARGETS="gfx908;gfx90a" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j check"""
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
}
}
}
}
stage
(
"Build CK and run Tests"
)
{
parallel
...
...
@@ -815,6 +846,26 @@ pipeline {
cleanWs
()
}
}
stage
(
"Build CK and run Tests on MI300"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx942"
)
}
environment
{
setup_args
=
""" -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx942" -DCMAKE_CXX_FLAGS=" -O3 " """
execute_args
=
""" cd ../client_example && rm -rf build && mkdir build && cd build && \
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
-DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_COMPILER="${build_compiler()}" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """
}
steps
{
Build_CK_and_Reboot
(
setup_args:
setup_args
,
config_targets:
"install"
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
,
prefixpath:
'/usr/local'
)
cleanWs
()
}
}
stage
(
"Build CK and run Tests on MI100/MI200"
)
{
when
{
...
...
client_example/01_gemm/gemm.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
@@ -92,7 +92,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
@@ -93,7 +93,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
@@ -88,7 +88,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
@@ -89,7 +89,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
@@ -84,7 +84,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
@@ -85,7 +85,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
@@ -17,6 +17,8 @@
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
ADataType
=
F16
;
using
BDataType
=
F16
;
using
BiasDataType
=
F32
;
...
...
@@ -191,7 +193,7 @@ int main()
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iostream>
...
...
@@ -78,7 +78,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
...
...
client_example/04_contraction/contraction_bilinear_fp32.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <numeric>
...
...
client_example/04_contraction/contraction_bilinear_fp64.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <numeric>
...
...
client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <numeric>
...
...
client_example/04_contraction/contraction_scale_fp32.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <numeric>
...
...
client_example/04_contraction/contraction_scale_fp64.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <numeric>
...
...
client_example/05_layernorm/layernorm2d_bwd_data.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
client_example/05_layernorm/layernorm2d_fwd.cpp
View file @
6ac1d6a2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
...
...
Prev
1
2
3
4
5
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment