Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
ccaea50e
Commit
ccaea50e
authored
Mar 08, 2024
by
Jing Zhang
Browse files
merge navi31_rel
parents
0b914465
10127959
Changes
126
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
127 additions
and
78 deletions
+127
-78
.github/CODEOWNERS
.github/CODEOWNERS
+6
-6
Jenkinsfile
Jenkinsfile
+92
-45
client_example/01_gemm/gemm.cpp
client_example/01_gemm/gemm.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
...xample/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
...2_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
...nt_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
...le/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
...xample/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
+2
-2
client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
...xample/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
+4
-2
client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
...03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
+2
-2
client_example/04_contraction/contraction_bilinear_fp32.cpp
client_example/04_contraction/contraction_bilinear_fp32.cpp
+1
-1
client_example/04_contraction/contraction_bilinear_fp64.cpp
client_example/04_contraction/contraction_bilinear_fp64.cpp
+1
-1
client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
...mple/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
+1
-1
client_example/04_contraction/contraction_scale_fp32.cpp
client_example/04_contraction/contraction_scale_fp32.cpp
+1
-1
client_example/04_contraction/contraction_scale_fp64.cpp
client_example/04_contraction/contraction_scale_fp64.cpp
+1
-1
client_example/05_layernorm/layernorm2d_bwd_data.cpp
client_example/05_layernorm/layernorm2d_bwd_data.cpp
+1
-1
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
+1
-1
client_example/05_layernorm/layernorm2d_fwd.cpp
client_example/05_layernorm/layernorm2d_fwd.cpp
+1
-1
client_example/05_layernorm/layernorm4d_fwd.cpp
client_example/05_layernorm/layernorm4d_fwd.cpp
+1
-1
No files found.
.github/CODEOWNERS
View file @
ccaea50e
* @zjing14
@asroy
@junliume @illsilin @carlushuang @aosewski
* @zjing14 @junliume @illsilin @carlushuang @aosewski
# Documentation files
# Documentation files
docs/* @
saadrahim @LisaDelaney
docs/* @
ROCm/rocm-documentation
*.md
@saadrahim @LisaDelaney
*.md
@ROCm/rocm-documentation
*.rst
@saadrahim @LisaDelaney
*.rst
@ROCm/rocm-documentation
# Header directory
# Header directory
for Doxygen documentation
library/include/*
@saadrahim @LisaDelaney
library/include/*
@ROCm/rocm-documentation
Jenkinsfile
View file @
ccaea50e
def
rocmnode
(
name
)
{
def
rocmnode
(
name
)
{
return
'(rocmtest || miopen) && '
+
name
return
'(rocmtest || miopen) &&
(
'
+
name
+
')'
}
}
def
show_node_info
()
{
def
show_node_info
()
{
...
@@ -7,6 +7,7 @@ def show_node_info() {
...
@@ -7,6 +7,7 @@ def show_node_info() {
echo "NODE_NAME = \$NODE_NAME"
echo "NODE_NAME = \$NODE_NAME"
lsb_release -sd
lsb_release -sd
uname -r
uname -r
cat /sys/module/amdgpu/version
ls /opt/ -la
ls /opt/ -la
"""
"""
}
}
...
@@ -33,6 +34,10 @@ def runShell(String command){
...
@@ -33,6 +34,10 @@ def runShell(String command){
def
getDockerImageName
(){
def
getDockerImageName
(){
def
img
def
img
if
(
params
.
USE_CUSTOM_DOCKER
!=
""
){
img
=
"${params.USE_CUSTOM_DOCKER}"
}
else
{
if
(
params
.
ROCMVERSION
!=
"6.0.1"
){
if
(
params
.
ROCMVERSION
!=
"6.0.1"
){
if
(
params
.
COMPILER_VERSION
==
""
)
{
if
(
params
.
COMPILER_VERSION
==
""
)
{
img
=
"${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}"
img
=
"${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}"
...
@@ -61,6 +66,7 @@ def getDockerImageName(){
...
@@ -61,6 +66,7 @@ def getDockerImageName(){
}
}
}
}
}
}
}
return
img
return
img
}
}
...
@@ -258,18 +264,24 @@ def cmake_build(Map conf=[:]){
...
@@ -258,18 +264,24 @@ def cmake_build(Map conf=[:]){
"""
)
"""
)
sh
cmd3
sh
cmd3
}
}
def
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake ${setup_args} .. "
)
// reduce parallelism when compiling, clang uses too much memory
// reduce parallelism when compiling, clang uses too much memory
def
nt
=
nthreads
()
def
nt
=
nthreads
()
def
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} dumb-init make -j${nt} ${config_targets}"
)
def
cmd
def
execute_cmd
=
conf
.
get
(
"execute_cmd"
,
""
)
def
execute_cmd
=
conf
.
get
(
"execute_cmd"
,
""
)
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)){
def
cmd
=
conf
.
get
(
"cmd"
,
"""
def
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake ${setup_args} .. "
)
def
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} dumb-init make -j${nt} ${config_targets}"
)
cmd
=
conf
.
get
(
"cmd"
,
"""
${setup_cmd}
${setup_cmd}
${build_cmd}
${build_cmd}
${execute_cmd}
${execute_cmd}
"""
)
"""
)
}
else
{
cmd
=
conf
.
get
(
"cmd"
,
"""
${execute_cmd}
"""
)
}
echo
cmd
echo
cmd
...
@@ -365,8 +377,8 @@ def runCKProfiler(Map conf=[:]){
...
@@ -365,8 +377,8 @@ def runCKProfiler(Map conf=[:]){
(
retimage
,
image
)
=
getDockerImage
(
conf
)
(
retimage
,
image
)
=
getDockerImage
(
conf
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'
PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" cl
info | tee
cl
info.log'
sh
'
rocm
info | tee
rocm
info.log'
if
(
runShell
(
'grep -n "
Number of devices:.*. 0" cl
info.log'
)
){
if
(
!
runShell
(
'grep -n "
gfx" rocm
info.log'
)
){
throw
new
Exception
(
"GPU not found"
)
throw
new
Exception
(
"GPU not found"
)
}
}
else
{
else
{
...
@@ -379,20 +391,6 @@ def runCKProfiler(Map conf=[:]){
...
@@ -379,20 +391,6 @@ def runCKProfiler(Map conf=[:]){
echo
"The job was cancelled or aborted"
echo
"The job was cancelled or aborted"
throw
e
throw
e
}
}
catch
(
Exception
ex
)
{
retimage
=
docker
.
build
(
"${image}"
,
dockerArgs
+
" --no-cache ."
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
if
(
runShell
(
'grep -n "Number of devices:.*. 0" clinfo.log'
)
){
throw
new
Exception
(
"GPU not found"
)
}
else
{
echo
"GPU is OK"
}
}
}
}
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
timeout
(
time:
24
,
unit:
'HOURS'
)
timeout
(
time:
24
,
unit:
'HOURS'
)
...
@@ -473,6 +471,7 @@ def Build_CK(Map conf=[:]){
...
@@ -473,6 +471,7 @@ def Build_CK(Map conf=[:]){
show_node_info
()
show_node_info
()
env
.
HSA_ENABLE_SDMA
=
0
env
.
HSA_ENABLE_SDMA
=
0
env
.
DOCKER_BUILDKIT
=
1
checkout
scm
checkout
scm
def
image
=
getDockerImageName
()
def
image
=
getDockerImageName
()
...
@@ -487,25 +486,35 @@ def Build_CK(Map conf=[:]){
...
@@ -487,25 +486,35 @@ def Build_CK(Map conf=[:]){
if
(
params
.
COMPILER_VERSION
==
"amd-staging"
||
params
.
COMPILER_VERSION
==
"amd-mainline-open"
||
params
.
COMPILER_COMMIT
!=
""
){
if
(
params
.
COMPILER_VERSION
==
"amd-staging"
||
params
.
COMPILER_VERSION
==
"amd-mainline-open"
||
params
.
COMPILER_COMMIT
!=
""
){
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
}
}
def
video_id
=
sh
(
returnStdout:
true
,
script:
'getent group video | cut -d: -f3'
)
def
render_id
=
sh
(
returnStdout:
true
,
script:
'getent group render | cut -d: -f3'
)
dockerOpts
=
dockerOpts
+
" --group-add=${video_id} --group-add=${render_id} "
echo
"Docker flags: ${dockerOpts}"
def
variant
=
env
.
STAGE_NAME
def
variant
=
env
.
STAGE_NAME
def
retimage
def
retimage
def
navi_node
=
0
def
navi_node
=
0
def
mi300_node
=
0
gitStatusWrapper
(
credentialsId:
"${status_wrapper_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel-internal'
)
{
gitStatusWrapper
(
credentialsId:
"${status_wrapper_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel-internal'
)
{
try
{
try
{
(
retimage
,
image
)
=
getDockerImage
(
conf
)
(
retimage
,
image
)
=
getDockerImage
(
conf
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'
PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" cl
info | tee
cl
info.log'
sh
'
rocm
info | tee
rocm
info.log'
if
(
runShell
(
'grep -n "
Number of devices:.*. 0" cl
info.log'
)
){
if
(
!
runShell
(
'grep -n "
gfx" rocm
info.log'
)
){
throw
new
Exception
(
"GPU not found"
)
throw
new
Exception
(
"GPU not found"
)
}
}
else
{
else
{
echo
"GPU is OK"
echo
"GPU is OK"
}
}
if
(
runShell
(
'grep -n "gfx1030"
cl
info.log'
)
||
runShell
(
'grep -n "gfx1101"
cl
info.log'
)
){
if
(
runShell
(
'grep -n "gfx1030"
rocm
info.log'
)
||
runShell
(
'grep -n "gfx1101"
rocm
info.log'
)
){
navi_node
=
1
navi_node
=
1
echo
"This is a Navi node"
}
if
(
runShell
(
'grep -n "gfx942" rocminfo.log'
)
){
mi300_node
=
1
echo
"This is MI300 node"
}
}
}
}
}
}
...
@@ -514,23 +523,6 @@ def Build_CK(Map conf=[:]){
...
@@ -514,23 +523,6 @@ def Build_CK(Map conf=[:]){
echo
"The job was cancelled or aborted"
echo
"The job was cancelled or aborted"
throw
e
throw
e
}
}
catch
(
Exception
ex
)
{
retimage
=
docker
.
build
(
"${image}"
,
dockerArgs
+
" --no-cache ."
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log'
if
(
runShell
(
'grep -n "Number of devices:.*. 0" clinfo.log'
)
){
throw
new
Exception
(
"GPU not found"
)
}
else
{
echo
"GPU is OK"
}
if
(
runShell
(
'grep -n "gfx1030" clinfo.log'
)
||
runShell
(
'grep -n "gfx1101" clinfo.log'
)
){
navi_node
=
1
}
}
}
}
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
timeout
(
time:
24
,
unit:
'HOURS'
)
timeout
(
time:
24
,
unit:
'HOURS'
)
{
{
...
@@ -544,8 +536,8 @@ def Build_CK(Map conf=[:]){
...
@@ -544,8 +536,8 @@ def Build_CK(Map conf=[:]){
sh
'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
sh
'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
stash
"ckProfiler.tar.gz"
stash
"ckProfiler.tar.gz"
}
}
if
(
params
.
RUN_FULL_QA
){
if
(
params
.
RUN_FULL_QA
&&
mi300_node
==
0
){
// build deb packages
// build deb packages
for all MI100/200/300 targets and prepare to export
sh
'make -j package'
sh
'make -j package'
archiveArtifacts
artifacts:
'composablekernel-ckprofiler_*.deb'
archiveArtifacts
artifacts:
'composablekernel-ckprofiler_*.deb'
archiveArtifacts
artifacts:
'composablekernel-tests_*.deb'
archiveArtifacts
artifacts:
'composablekernel-tests_*.deb'
...
@@ -669,6 +661,10 @@ pipeline {
...
@@ -669,6 +661,10 @@ pipeline {
name:
"BUILD_DOCKER"
,
name:
"BUILD_DOCKER"
,
defaultValue:
false
,
defaultValue:
false
,
description:
"Force building docker image (default: false), set to true if docker image needs to be updated."
)
description:
"Force building docker image (default: false), set to true if docker image needs to be updated."
)
string
(
name:
'USE_CUSTOM_DOCKER'
,
defaultValue:
''
,
description:
'If you want to use a custom docker image, please specify it here (default: leave blank).'
)
string
(
string
(
name:
'ROCMVERSION'
,
name:
'ROCMVERSION'
,
defaultValue:
'6.0'
,
defaultValue:
'6.0'
,
...
@@ -713,6 +709,10 @@ pipeline {
...
@@ -713,6 +709,10 @@ pipeline {
name:
"RUN_PERFORMANCE_TESTS"
,
name:
"RUN_PERFORMANCE_TESTS"
,
defaultValue:
false
,
defaultValue:
false
,
description:
"Run the performance tests (default: OFF)"
)
description:
"Run the performance tests (default: OFF)"
)
booleanParam
(
name:
"RUN_CODEGEN_TESTS"
,
defaultValue:
true
,
description:
"Run the codegen tests (default: ON)"
)
}
}
environment
{
environment
{
dbuser
=
"${dbuser}"
dbuser
=
"${dbuser}"
...
@@ -791,7 +791,34 @@ pipeline {
...
@@ -791,7 +791,34 @@ pipeline {
}
}
}
}
}
}
stage
(
"Run Codegen Tests"
)
{
parallel
{
stage
(
"Run Codegen Tests on MI100/MI200"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_CODEGEN_TESTS
.
toBoolean
()
}
}
options
{
retry
(
2
)
}
agent
{
label
rocmnode
(
"gfx908 || gfx90a"
)}
environment
{
setup_args
=
"NO_CK_BUILD"
execute_args
=
""" cd ../codegen && rm -rf build && mkdir build && cd build && \
cmake -D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
-D CMAKE_BUILD_TYPE=Release \
-D GPU_TARGETS="gfx908;gfx90a" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j check"""
}
steps
{
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
}
}
}
}
stage
(
"Build CK and run Tests"
)
stage
(
"Build CK and run Tests"
)
{
{
parallel
parallel
...
@@ -819,6 +846,26 @@ pipeline {
...
@@ -819,6 +846,26 @@ pipeline {
cleanWs
()
cleanWs
()
}
}
}
}
stage
(
"Build CK and run Tests on MI300"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx942"
)
}
environment
{
setup_args
=
""" -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx942" -DCMAKE_CXX_FLAGS=" -O3 " """
execute_args
=
""" cd ../client_example && rm -rf build && mkdir build && cd build && \
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
-DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_COMPILER="${build_compiler()}" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """
}
steps
{
Build_CK_and_Reboot
(
setup_args:
setup_args
,
config_targets:
"install"
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
,
prefixpath:
'/usr/local'
)
cleanWs
()
}
}
stage
(
"Build CK and run Tests on MI100/MI200"
)
stage
(
"Build CK and run Tests on MI100/MI200"
)
{
{
when
{
when
{
...
...
client_example/01_gemm/gemm.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -92,7 +92,7 @@ int main(int argc, char* argv[])
...
@@ -92,7 +92,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -93,7 +93,7 @@ int main(int argc, char* argv[])
...
@@ -93,7 +93,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -88,7 +88,7 @@ int main(int argc, char* argv[])
...
@@ -88,7 +88,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -89,7 +89,7 @@ int main(int argc, char* argv[])
...
@@ -89,7 +89,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -84,7 +84,7 @@ int main(int argc, char* argv[])
...
@@ -84,7 +84,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -85,7 +85,7 @@ int main(int argc, char* argv[])
...
@@ -85,7 +85,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -17,6 +17,8 @@
...
@@ -17,6 +17,8 @@
using
F16
=
ck
::
half_t
;
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
F32
=
float
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
ADataType
=
F16
;
using
ADataType
=
F16
;
using
BDataType
=
F16
;
using
BDataType
=
F16
;
using
BiasDataType
=
F32
;
using
BiasDataType
=
F32
;
...
@@ -191,7 +193,7 @@ int main()
...
@@ -191,7 +193,7 @@ int main()
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <iostream>
#include <iostream>
...
@@ -78,7 +78,7 @@ int main(int argc, char* argv[])
...
@@ -78,7 +78,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/04_contraction/contraction_bilinear_fp32.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/04_contraction/contraction_bilinear_fp64.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/04_contraction/contraction_scale_fp32.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/04_contraction/contraction_scale_fp64.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/05_layernorm/layernorm2d_bwd_data.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
...
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
...
client_example/05_layernorm/layernorm2d_fwd.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
...
client_example/05_layernorm/layernorm4d_fwd.cpp
View file @
ccaea50e
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
...
Prev
1
2
3
4
5
…
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment