Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
41b920e2
Commit
41b920e2
authored
Mar 12, 2024
by
Jun Liu
Browse files
Merge branch 'amd-develop' into amd-master
parents
874a78f9
5d718e6b
Changes
357
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
43 additions
and
33 deletions
+43
-33
CHANGELOG.md
CHANGELOG.md
+1
-1
Dockerfile
Dockerfile
+7
-4
Jenkinsfile
Jenkinsfile
+7
-2
client_example/01_gemm/gemm.cpp
client_example/01_gemm/gemm.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
...xample/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
...2_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
...nt_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
...le/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
+2
-2
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
...xample/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
+2
-2
client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
...xample/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
+4
-2
client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
...03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
+2
-2
client_example/04_contraction/contraction_bilinear_fp32.cpp
client_example/04_contraction/contraction_bilinear_fp32.cpp
+1
-1
client_example/04_contraction/contraction_bilinear_fp64.cpp
client_example/04_contraction/contraction_bilinear_fp64.cpp
+1
-1
client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
...mple/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
+1
-1
client_example/04_contraction/contraction_scale_fp32.cpp
client_example/04_contraction/contraction_scale_fp32.cpp
+1
-1
client_example/04_contraction/contraction_scale_fp64.cpp
client_example/04_contraction/contraction_scale_fp64.cpp
+1
-1
client_example/05_layernorm/layernorm2d_bwd_data.cpp
client_example/05_layernorm/layernorm2d_bwd_data.cpp
+1
-1
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
+1
-1
client_example/05_layernorm/layernorm2d_fwd.cpp
client_example/05_layernorm/layernorm2d_fwd.cpp
+1
-1
No files found.
CHANGELOG.md
View file @
41b920e2
...
@@ -11,7 +11,7 @@ None
...
@@ -11,7 +11,7 @@ None
None
None
### Additions
### Additions
*
Introduced wrapper sublibrary (limited functionality). (#1071, #1098, #1108, #1126)
*
Introduced wrapper sublibrary (limited functionality). (#1071, #1098, #1108, #1126
, #1139
)
### Changes
### Changes
None
None
...
...
Dockerfile
View file @
41b920e2
...
@@ -44,7 +44,6 @@ ENV PATH=$PATH:${SCCACHE_INSTALL_LOCATION}
...
@@ -44,7 +44,6 @@ ENV PATH=$PATH:${SCCACHE_INSTALL_LOCATION}
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
build-essential
\
build-essential
\
cmake
\
cmake
\
ccache
\
git
\
git
\
hip-rocclr
\
hip-rocclr
\
iputils-ping
\
iputils-ping
\
...
@@ -74,6 +73,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
...
@@ -74,6 +73,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
apt-get clean
&&
\
apt-get clean
&&
\
rm
-rf
/var/lib/apt/lists/
*
rm
-rf
/var/lib/apt/lists/
*
#Install latest ccache
RUN
git clone https://github.com/ccache/ccache.git
&&
\
cd
ccache
&&
mkdir
build
&&
cd
build
&&
cmake ..
&&
make
install
#Install ninja build tracing tools
#Install ninja build tracing tools
RUN
wget
-qO
/usr/local/bin/ninja.gz https://github.com/ninja-build/ninja/releases/latest/download/ninja-linux.zip
RUN
wget
-qO
/usr/local/bin/ninja.gz https://github.com/ninja-build/ninja/releases/latest/download/ninja-linux.zip
RUN
gunzip
/usr/local/bin/ninja.gz
RUN
gunzip
/usr/local/bin/ninja.gz
...
@@ -111,7 +114,7 @@ ENV LANG=C.UTF-8
...
@@ -111,7 +114,7 @@ ENV LANG=C.UTF-8
RUN
groupadd
-f
render
RUN
groupadd
-f
render
# Install the new rocm-cmake version
# Install the new rocm-cmake version
RUN
git clone
-b
master https://github.com/R
adeonOpenCompute
/rocm-cmake.git
&&
\
RUN
git clone
-b
master https://github.com/R
OCm
/rocm-cmake.git
&&
\
cd
rocm-cmake
&&
mkdir
build
&&
cd
build
&&
\
cd
rocm-cmake
&&
mkdir
build
&&
cd
build
&&
\
cmake ..
&&
cmake
--build
.
&&
cmake
--build
.
--target
install
cmake ..
&&
cmake
--build
.
&&
cmake
--build
.
--target
install
...
@@ -123,7 +126,7 @@ RUN sh -c "echo compiler version = '$compiler_version'"
...
@@ -123,7 +126,7 @@ RUN sh -c "echo compiler version = '$compiler_version'"
RUN
sh
-c
"echo compiler commit = '
$compiler_commit
'"
RUN
sh
-c
"echo compiler commit = '
$compiler_commit
'"
RUN if
(
[
"
$compiler_version
"
=
"amd-staging"
]
||
[
"
$compiler_version
"
=
"amd-mainline-open"
]
)
&&
[
"
$compiler_commit
"
=
""
]
;
then
\
RUN if
(
[
"
$compiler_version
"
=
"amd-staging"
]
||
[
"
$compiler_version
"
=
"amd-mainline-open"
]
)
&&
[
"
$compiler_commit
"
=
""
]
;
then
\
git clone
-b
"
$compiler_version
"
https://github.com/R
adeonOpenCompute
/llvm-project.git
&&
\
git clone
-b
"
$compiler_version
"
https://github.com/R
OCm
/llvm-project.git
&&
\
cd
llvm-project
&&
mkdir
build
&&
cd
build
&&
\
cd
llvm-project
&&
mkdir
build
&&
cd
build
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld"
-DLLVM_ENABLE_RUNTIMES
=
"compiler-rt"
../llvm
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld"
-DLLVM_ENABLE_RUNTIMES
=
"compiler-rt"
../llvm
&&
\
make
-j
8
;
\
make
-j
8
;
\
...
@@ -131,7 +134,7 @@ RUN if ( [ "$compiler_version" = "amd-staging" ] || [ "$compiler_version" = "amd
...
@@ -131,7 +134,7 @@ RUN if ( [ "$compiler_version" = "amd-staging" ] || [ "$compiler_version" = "amd
fi
fi
RUN if
(
[
"
$compiler_version
"
=
"amd-staging"
]
||
[
"
$compiler_version
"
=
"amd-mainline-open"
]
)
&&
[
"
$compiler_commit
"
!=
""
]
;
then
\
RUN if
(
[
"
$compiler_version
"
=
"amd-staging"
]
||
[
"
$compiler_version
"
=
"amd-mainline-open"
]
)
&&
[
"
$compiler_commit
"
!=
""
]
;
then
\
git clone
-b
"
$compiler_version
"
https://github.com/R
adeonOpenCompute
/llvm-project.git
&&
\
git clone
-b
"
$compiler_version
"
https://github.com/R
OCm
/llvm-project.git
&&
\
cd
llvm-project
&&
git checkout
"
$compiler_commit
"
&&
echo
"checking out commit
$compiler_commit
"
&&
mkdir
build
&&
cd
build
&&
\
cd
llvm-project
&&
git checkout
"
$compiler_commit
"
&&
echo
"checking out commit
$compiler_commit
"
&&
mkdir
build
&&
cd
build
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld"
-DLLVM_ENABLE_RUNTIMES
=
"compiler-rt"
../llvm
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld"
-DLLVM_ENABLE_RUNTIMES
=
"compiler-rt"
../llvm
&&
\
make
-j
8
;
\
make
-j
8
;
\
...
...
Jenkinsfile
View file @
41b920e2
...
@@ -134,7 +134,10 @@ def buildDocker(install_prefix){
...
@@ -134,7 +134,10 @@ def buildDocker(install_prefix){
//force building the new docker if that parameter is true
//force building the new docker if that parameter is true
echo
"Building image: ${image_name}"
echo
"Building image: ${image_name}"
retimage
=
docker
.
build
(
"${image_name}"
,
dockerArgs
+
' .'
)
retimage
=
docker
.
build
(
"${image_name}"
,
dockerArgs
+
' .'
)
retimage
.
push
()
withDockerRegistry
([
credentialsId:
"docker_test_cred"
,
url:
""
])
{
retimage
.
push
()
}
sh
'docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi'
}
}
else
{
else
{
echo
"Checking for image: ${image_name}"
echo
"Checking for image: ${image_name}"
...
@@ -145,7 +148,9 @@ def buildDocker(install_prefix){
...
@@ -145,7 +148,9 @@ def buildDocker(install_prefix){
catch
(
Exception
ex
){
catch
(
Exception
ex
){
echo
"Unable to locate image: ${image_name}. Building image now"
echo
"Unable to locate image: ${image_name}. Building image now"
retimage
=
docker
.
build
(
"${image_name}"
,
dockerArgs
+
' .'
)
retimage
=
docker
.
build
(
"${image_name}"
,
dockerArgs
+
' .'
)
retimage
.
push
()
withDockerRegistry
([
credentialsId:
"docker_test_cred"
,
url:
""
])
{
retimage
.
push
()
}
}
}
}
}
...
...
client_example/01_gemm/gemm.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -92,7 +92,7 @@ int main(int argc, char* argv[])
...
@@ -92,7 +92,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -93,7 +93,7 @@ int main(int argc, char* argv[])
...
@@ -93,7 +93,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -88,7 +88,7 @@ int main(int argc, char* argv[])
...
@@ -88,7 +88,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -89,7 +89,7 @@ int main(int argc, char* argv[])
...
@@ -89,7 +89,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -84,7 +84,7 @@ int main(int argc, char* argv[])
...
@@ -84,7 +84,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -85,7 +85,7 @@ int main(int argc, char* argv[])
...
@@ -85,7 +85,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
@@ -17,6 +17,8 @@
...
@@ -17,6 +17,8 @@
using
F16
=
ck
::
half_t
;
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
F32
=
float
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
ADataType
=
F16
;
using
ADataType
=
F16
;
using
BDataType
=
F16
;
using
BDataType
=
F16
;
using
BiasDataType
=
F32
;
using
BiasDataType
=
F32
;
...
@@ -191,7 +193,7 @@ int main()
...
@@ -191,7 +193,7 @@ int main()
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <iostream>
#include <iostream>
...
@@ -78,7 +78,7 @@ int main(int argc, char* argv[])
...
@@ -78,7 +78,7 @@ int main(int argc, char* argv[])
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
[](
std
::
size_t
nRow
,
std
::
size_t
nCol
,
std
::
size_t
stride
,
auto
layout
)
{
using
Layout
=
decltype
(
layout
);
using
Layout
=
decltype
(
layout
);
if
constexpr
(
std
::
is_same
<
Layout
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
if
constexpr
(
std
::
is_same
<
Layout
,
Row
>::
value
)
{
{
return
(
nRow
-
1
)
*
stride
+
nCol
;
return
(
nRow
-
1
)
*
stride
+
nCol
;
}
}
...
...
client_example/04_contraction/contraction_bilinear_fp32.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/04_contraction/contraction_bilinear_fp64.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/04_contraction/contraction_scale_fp32.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/04_contraction/contraction_scale_fp64.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <numeric>
#include <numeric>
...
...
client_example/05_layernorm/layernorm2d_bwd_data.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
...
client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
...
client_example/05_layernorm/layernorm2d_fwd.cpp
View file @
41b920e2
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <iomanip>
#include <vector>
#include <vector>
...
...
Prev
1
2
3
4
5
…
18
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment