Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
8b979efb
Unverified
Commit
8b979efb
authored
Apr 20, 2023
by
rocking5566
Committed by
GitHub
Apr 20, 2023
Browse files
Merge branch 'develop' into normalization/splitK
parents
f973f955
bb0b772d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
110 additions
and
25 deletions
+110
-25
Dockerfile
Dockerfile
+24
-13
Jenkinsfile
Jenkinsfile
+21
-10
client_example/18_groupnorm/groupnorm_swish.cpp
client_example/18_groupnorm/groupnorm_swish.cpp
+2
-2
library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp
...ary/tensor_operation_instance/gpu/normalization_swish.hpp
+12
-0
library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt
...ensor_operation_instance/gpu/normalization/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
...ation/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
+24
-0
library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp
...tance/gpu/normalization/normalization_instance_common.hpp
+26
-0
No files found.
Dockerfile
View file @
8b979efb
FROM
ubuntu:20.04
FROM
ubuntu:20.04
ARG
ROCMVERSION=5.3
ARG
ROCMVERSION=5.
4.
3
ARG
compiler_version="release"
ARG
compiler_version="release"
ARG
compiler_commit=""
ARG
compiler_commit=""
...
@@ -8,23 +8,27 @@ RUN set -xe
...
@@ -8,23 +8,27 @@ RUN set -xe
ARG
DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/.apt_$ROCMVERSION/
ARG
DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/.apt_$ROCMVERSION/
RUN
useradd
-rm
-d
/home/jenkins
-s
/bin/bash
-u
1004 jenkins
RUN
useradd
-rm
-d
/home/jenkins
-s
/bin/bash
-u
1004 jenkins
RUN
useradd
-rm
-d
/home/manitera
-s
/bin/bash
-u
1002 manitera
# Add rocm repository
# Add rocm repository
RUN
apt-get update
RUN
apt-get update
RUN
apt-get
install
-y
wget gnupg
RUN
apt-get
install
-y
wget gnupg curl
RUN
wget
-qO
- http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
RUN
--mount
=
type
=
ssh
if
[
"
$ROCMVERSION
"
!=
"5.5"
]
;
then
\
wget
-qO
- http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
;
\
else
sh
-c
"wget http://artifactory-cdn.amd.com/artifactory/list/amdgpu-deb/amd-nonfree-radeon_20.04-1_all.deb"
&&
\
apt update
&&
apt-get
install
-y
./amd-nonfree-radeon_20.04-1_all.deb
&&
\
sh
-c
'echo deb [arch=amd64 trusted=yes] http://compute-artifactory.amd.com/artifactory/list/rocm-release-archive-20.04-deb/ 5.5 rel-50 > /etc/apt/sources.list.d/rocm-build.list'
&&
\
amdgpu-repo
--amdgpu-build
=
1558725
&&
DEBIAN_FRONTEND
=
noninteractive amdgpu-install
-y
--usecase
=
rocm
;
\
fi
RUN
sh
-c
"echo deb [arch=amd64]
$DEB_ROCM_REPO
ubuntu main > /etc/apt/sources.list.d/rocm.list"
RUN
sh
-c
"echo deb [arch=amd64]
$DEB_ROCM_REPO
ubuntu main > /etc/apt/sources.list.d/rocm.list"
RUN
wget
--no-check-certificate
-qO
- https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
RUN
wget
--no-check-certificate
-qO
- https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
RUN
sh
-c
"echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
RUN
sh
-c
"echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
RUN
curl
-fsSL
https://repo.radeon.com/rocm/rocm.gpg.key | gpg
--dearmor
-o
/etc/apt/trusted.gpg.d/rocm-keyring.gpg
# Install dependencies
# Install dependencies
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
apt-utils
\
apt-utils
\
build-essential
\
build-essential
\
ccache
\
ccache
\
cmake-data
\
cmake
\
cmake
\
curl
\
git
\
git
\
hip-rocclr
\
hip-rocclr
\
jq
\
jq
\
...
@@ -45,6 +49,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
...
@@ -45,6 +49,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
rocm-device-libs
\
rocm-device-libs
\
rocm-cmake
\
rocm-cmake
\
vim
\
vim
\
nano
\
zlib1g-dev
\
zlib1g-dev
\
openssh-server
\
openssh-server
\
clang-format-10
\
clang-format-10
\
...
@@ -52,6 +57,17 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
...
@@ -52,6 +57,17 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
apt-get clean
&&
\
apt-get clean
&&
\
rm
-rf
/var/lib/apt/lists/
*
rm
-rf
/var/lib/apt/lists/
*
#Install latest version of cmake
RUN
apt purge
--auto-remove
-y
cmake
RUN
apt update
RUN
apt
install
-y
software-properties-common lsb-release
RUN
apt clean all
RUN
wget
-O
- https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg
--dearmor
- |
tee
/etc/apt/trusted.gpg.d/kitware.gpg
>
/dev/null
RUN
apt-add-repository
"deb https://apt.kitware.com/ubuntu/
$(
lsb_release
-cs
)
main"
RUN
apt
install
-y
kitware-archive-keyring
RUN
rm
/etc/apt/trusted.gpg.d/kitware.gpg
RUN
apt
install
-y
cmake
# Setup ubsan environment to printstacktrace
# Setup ubsan environment to printstacktrace
RUN
ln
-s
/usr/bin/llvm-symbolizer-3.8 /usr/local/bin/llvm-symbolizer
RUN
ln
-s
/usr/bin/llvm-symbolizer-3.8 /usr/local/bin/llvm-symbolizer
ENV
UBSAN_OPTIONS=print_stacktrace=1
ENV
UBSAN_OPTIONS=print_stacktrace=1
...
@@ -87,12 +103,7 @@ ENV compiler_commit=$compiler_commit
...
@@ -87,12 +103,7 @@ ENV compiler_commit=$compiler_commit
RUN
sh
-c
"echo compiler version = '
$compiler_version
'"
RUN
sh
-c
"echo compiler version = '
$compiler_version
'"
RUN
sh
-c
"echo compiler commit = '
$compiler_commit
'"
RUN
sh
-c
"echo compiler commit = '
$compiler_commit
'"
RUN
--mount
=
type
=
ssh
if
[
"
$compiler_version
"
=
"amd-stg-open"
]
;
then
\
RUN
--mount
=
type
=
ssh
if
[
"
$compiler_version
"
!=
"release"
]
&&
[
"
$compiler_version
"
!=
~ ^
"rc"
]
&&
[
"
$compiler_commit
"
=
""
]
;
then
\
sed
-i
'/$HIP_CLANG_TARGET = chomp($HIP_CLANG_TARGET);/c\ chomp($HIP_CLANG_TARGET);'
/opt/rocm/hip/bin/hipcc.pl
&&
\
sed
-i
'/$HIP_CLANG_TARGET = chomp($HIP_CLANG_TARGET);/c\ chomp($HIP_CLANG_TARGET);'
/opt/rocm/bin/hipcc.pl
;
\
fi
RUN
--mount
=
type
=
ssh
if
[
"
$compiler_version
"
!=
"release"
]
&&
[
"
$compiler_commit
"
=
""
]
;
then
\
git clone
-b
"
$compiler_version
"
https://github.com/RadeonOpenCompute/llvm-project.git
&&
\
git clone
-b
"
$compiler_version
"
https://github.com/RadeonOpenCompute/llvm-project.git
&&
\
cd
llvm-project
&&
mkdir
build
&&
cd
build
&&
\
cd
llvm-project
&&
mkdir
build
&&
cd
build
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld;compiler-rt"
../llvm
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld;compiler-rt"
../llvm
&&
\
...
@@ -100,7 +111,7 @@ RUN --mount=type=ssh if [ "$compiler_version" != "release" ] && [ "$compiler_com
...
@@ -100,7 +111,7 @@ RUN --mount=type=ssh if [ "$compiler_version" != "release" ] && [ "$compiler_com
else
echo
"using the release compiler"
;
\
else
echo
"using the release compiler"
;
\
fi
fi
RUN
--mount
=
type
=
ssh
if
[
"
$compiler_version
"
!=
"release"
]
&&
[
"
$compiler_commit
"
!=
""
]
;
then
\
RUN
--mount
=
type
=
ssh
if
[
"
$compiler_version
"
!=
"release"
]
&&
[
"
$compiler_version
"
!=
~ ^
"rc"
]
&&
[
"
$compiler_commit
"
!=
""
]
;
then
\
git clone
-b
"
$compiler_version
"
https://github.com/RadeonOpenCompute/llvm-project.git
&&
\
git clone
-b
"
$compiler_version
"
https://github.com/RadeonOpenCompute/llvm-project.git
&&
\
cd
llvm-project
&&
git checkout
"
$compiler_commit
"
&&
echo
"checking out commit
$compiler_commit
"
&&
mkdir
build
&&
cd
build
&&
\
cd
llvm-project
&&
git checkout
"
$compiler_commit
"
&&
echo
"checking out commit
$compiler_commit
"
&&
mkdir
build
&&
cd
build
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld;compiler-rt"
../llvm
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/opt/rocm/llvm
-DCMAKE_BUILD_TYPE
=
Release
-DLLVM_ENABLE_ASSERTIONS
=
1
-DLLVM_TARGETS_TO_BUILD
=
"AMDGPU;X86"
-DLLVM_ENABLE_PROJECTS
=
"clang;lld;compiler-rt"
../llvm
&&
\
...
...
Jenkinsfile
View file @
8b979efb
...
@@ -19,12 +19,23 @@ def runShell(String command){
...
@@ -19,12 +19,23 @@ def runShell(String command){
def
getDockerImageName
(){
def
getDockerImageName
(){
def
img
def
img
if
(
params
.
COMPILER_COMMIT
==
""
){
if
(
params
.
ROCMVERSION
!=
"5.5"
){
img
=
"${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}"
if
(
params
.
COMPILER_COMMIT
==
""
){
img
=
"${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}"
}
else
{
def
commit
=
"${params.COMPILER_COMMIT}"
[
0
..
6
]
img
=
"${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}_${commit}"
}
}
}
else
{
else
{
def
commit
=
"${params.COMPILER_COMMIT}"
[
0
..
6
]
if
(
params
.
COMPILER_COMMIT
==
""
){
img
=
"${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}_${commit}"
img
=
"${env.CK_DOCKERHUB_PRIVATE}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}"
}
else
{
def
commit
=
"${params.COMPILER_COMMIT}"
[
0
..
6
]
img
=
"${env.CK_DOCKERHUB_PRIVATE}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}_${commit}"
}
}
}
return
img
return
img
}
}
...
@@ -49,11 +60,11 @@ def build_compiler(){
...
@@ -49,11 +60,11 @@ def build_compiler(){
compiler
=
'/opt/rocm/bin/hipcc'
compiler
=
'/opt/rocm/bin/hipcc'
}
}
else
{
else
{
if
(
params
.
COMPILER_VERSION
==
"
release
"
){
if
(
params
.
COMPILER_VERSION
==
"
amd-stg-open"
||
params
.
COMPILER_COMMIT
!=
"
"
){
compiler
=
"/
opt/rocm/llvm
/bin/clang++"
compiler
=
"/
llvm-project/build
/bin/clang++"
}
}
else
{
else
{
compiler
=
"/
llvm-project/build
/bin/clang++"
compiler
=
"/
opt/rocm/llvm
/bin/clang++"
}
}
}
}
return
compiler
return
compiler
...
@@ -232,7 +243,7 @@ def buildHipClangJob(Map conf=[:]){
...
@@ -232,7 +243,7 @@ def buildHipClangJob(Map conf=[:]){
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
}
}
def
dockerArgs
=
"--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
def
dockerArgs
=
"--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
if
(
params
.
COMPILER_VERSION
!
=
"
release
"
){
if
(
params
.
COMPILER_VERSION
=
=
"
amd-stg-open"
||
params
.
COMPILER_COMMIT
!=
"
"
){
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
}
}
...
@@ -287,7 +298,7 @@ def runCKProfiler(Map conf=[:]){
...
@@ -287,7 +298,7 @@ def runCKProfiler(Map conf=[:]){
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
}
}
def
dockerArgs
=
"--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
def
dockerArgs
=
"--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
if
(
params
.
COMPILER_VERSION
!
=
"
release
"
){
if
(
params
.
COMPILER_VERSION
=
=
"
amd-stg-open"
||
params
.
COMPILER_COMMIT
!=
"
"
){
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
}
}
...
@@ -420,7 +431,7 @@ def Build_CK(Map conf=[:]){
...
@@ -420,7 +431,7 @@ def Build_CK(Map conf=[:]){
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
}
}
def
dockerArgs
=
"--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
def
dockerArgs
=
"--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
if
(
params
.
COMPILER_VERSION
!
=
"
release
"
){
if
(
params
.
COMPILER_VERSION
=
=
"
amd-stg-open"
||
params
.
COMPILER_COMMIT
!=
"
"
){
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
}
}
...
...
client_example/18_groupnorm/groupnorm_swish.cpp
View file @
8b979efb
...
@@ -13,8 +13,8 @@
...
@@ -13,8 +13,8 @@
#include "ck/library/tensor_operation_instance/gpu/normalization_swish.hpp"
#include "ck/library/tensor_operation_instance/gpu/normalization_swish.hpp"
using
XDataType
=
ck
::
half_t
;
using
XDataType
=
ck
::
half_t
;
using
GammaDataType
=
ck
::
half_
t
;
using
GammaDataType
=
floa
t
;
using
BetaDataType
=
ck
::
half_
t
;
using
BetaDataType
=
floa
t
;
using
YDataType
=
ck
::
half_t
;
using
YDataType
=
ck
::
half_t
;
using
ComputeDataType
=
float
;
using
ComputeDataType
=
float
;
using
Swish
=
ck
::
tensor_operation
::
element_wise
::
Swish
;
using
Swish
=
ck
::
tensor_operation
::
element_wise
::
Swish
;
...
...
library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp
View file @
8b979efb
...
@@ -25,6 +25,10 @@ void add_device_normalization_rank_5_3_swish_f16_instances(
...
@@ -25,6 +25,10 @@ void add_device_normalization_rank_5_3_swish_f16_instances(
void
add_device_normalization_rank_5_3_swish_f32_instances
(
void
add_device_normalization_rank_5_3_swish_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceNormalization
<
F32
,
F32
,
F32
,
F32
,
F32
,
Swish
,
5
,
3
>>>&
);
std
::
vector
<
std
::
unique_ptr
<
DeviceNormalization
<
F32
,
F32
,
F32
,
F32
,
F32
,
Swish
,
5
,
3
>>>&
);
// [x, gamma, beta, y] = [f16, f32, f32, f16]
void
add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceNormalization
<
F16
,
F32
,
F32
,
F32
,
F16
,
Swish
,
5
,
3
>>>&
);
template
<
typename
XDataType
,
template
<
typename
XDataType
,
typename
GammaDataType
,
typename
GammaDataType
,
typename
BetaDataType
,
typename
BetaDataType
,
...
@@ -70,6 +74,14 @@ struct DeviceOperationInstanceFactory<
...
@@ -70,6 +74,14 @@ struct DeviceOperationInstanceFactory<
add_device_normalization_rank_5_3_swish_f32_instances
(
op_ptrs
);
add_device_normalization_rank_5_3_swish_f32_instances
(
op_ptrs
);
}
}
}
}
else
if
constexpr
(
is_same_v
<
XDataType
,
F16
>
&&
is_same_v
<
GammaDataType
,
F32
>
&&
is_same_v
<
BetaDataType
,
F32
>
&&
is_same_v
<
YDataType
,
F16
>
)
{
if
constexpr
(
Rank
==
5
&&
NumReduceDim
==
3
)
{
add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances
(
op_ptrs
);
}
}
return
op_ptrs
;
return
op_ptrs
;
}
}
...
...
library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt
View file @
8b979efb
...
@@ -7,4 +7,5 @@ add_instance_library(device_normalization_instance
...
@@ -7,4 +7,5 @@ add_instance_library(device_normalization_instance
device_groupnorm_f32_instance.cpp
device_groupnorm_f32_instance.cpp
device_groupnorm_swish_f16_instance.cpp
device_groupnorm_swish_f16_instance.cpp
device_groupnorm_swish_f32_instance.cpp
device_groupnorm_swish_f32_instance.cpp
device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
)
)
library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
0 → 100644
View file @
8b979efb
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
Swish
=
ck
::
tensor_operation
::
element_wise
::
Swish
;
void
add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceNormalization
<
F16
,
F32
,
F32
,
F32
,
F16
,
Swish
,
5
,
3
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_normalization_f16_f32_f32_f16_instances
<
Swish
,
5
,
3
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp
View file @
8b979efb
...
@@ -69,6 +69,32 @@ using device_normalization_f32_instances = std::tuple<
...
@@ -69,6 +69,32 @@ using device_normalization_f32_instances = std::tuple<
// clang-format on
// clang-format on
>
;
>
;
template
<
typename
OutElementwise
,
index_t
Rank
,
index_t
Reduce
>
using
device_normalization_f16_f32_f32_f16_instances
=
std
::
tuple
<
// clang-format off
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
512
,
1
,
512
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
1024
,
1
,
1024
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
2
,
1
,
2
,
1
,
2
,
1
,
2
,
2
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
4
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
16
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
32
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
4
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
16
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
2
,
16
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
32
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
512
,
1
,
512
,
1
,
4
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
512
,
1
,
512
,
1
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
512
,
1
,
512
,
2
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
1024
,
1
,
1024
,
1
,
4
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
1024
,
1
,
1024
,
1
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
// clang-format on
>
;
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
}
// namespace tensor_operation
}
// namespace tensor_operation
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment