Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
a5abe1ad
"vscode:/vscode.git/clone" did not exist on "3ddfeff4f59a38a8c6b047a6eea3099215a10782"
Unverified
Commit
a5abe1ad
authored
Apr 17, 2023
by
zjing14
Committed by
GitHub
Apr 17, 2023
Browse files
Merge branch 'develop' into aosewski/ggemm_splitk
parents
0b7a77c2
fd11a4a1
Changes
18
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
122 additions
and
29 deletions
+122
-29
.github/dependabot.yml
.github/dependabot.yml
+12
-0
client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp
...tization/conv2d_fwd_bias_relu_perchannel_quantization.cpp
+2
-2
client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
...tization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
+2
-2
client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp
...le/09_quantization/conv2d_fwd_perchannel_quantization.cpp
+2
-2
client_example/18_groupnorm/groupnorm_swish.cpp
client_example/18_groupnorm/groupnorm_swish.cpp
+2
-2
cmake/googletest.cmake
cmake/googletest.cmake
+1
-0
docs/.sphinx/requirements.in
docs/.sphinx/requirements.in
+1
-1
docs/.sphinx/requirements.txt
docs/.sphinx/requirements.txt
+13
-17
include/ck/ck.hpp
include/ck/ck.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp
...ation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp
+9
-0
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp
...ration/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
+7
-0
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp
...or_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp
...ary/tensor_operation_instance/gpu/normalization_swish.hpp
+12
-0
library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt
...ensor_operation_instance/gpu/normalization/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
...ation/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
+24
-0
library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp
...tance/gpu/normalization/normalization_instance_common.hpp
+26
-0
No files found.
.github/dependabot.yml
0 → 100644
View file @
a5abe1ad
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version
:
2
updates
:
-
package-ecosystem
:
"
pip"
# See documentation for possible values
directory
:
"
/"
# Location of package manifests
open-pull-requests-limit
:
10
schedule
:
interval
:
"
daily"
client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp
View file @
a5abe1ad
...
...
@@ -73,7 +73,7 @@ int main(int argc, char* argv[])
SimpleDeviceMem
in
(
sizeof
(
InDataType
)
*
N
*
Hi
*
Wi
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
bias
(
sizeof
(
BiasDataType
)
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
...
...
client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
View file @
a5abe1ad
...
...
@@ -76,7 +76,7 @@ int main(int argc, char* argv[])
SimpleDeviceMem
in
(
sizeof
(
InDataType
)
*
N
*
Hi
*
Wi
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
bias
(
sizeof
(
BiasDataType
)
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
...
...
client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp
View file @
a5abe1ad
...
...
@@ -69,7 +69,7 @@ int main(int argc, char* argv[])
SimpleDeviceMem
in
(
sizeof
(
InDataType
)
*
N
*
Hi
*
Wi
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
K
);
using
DeviceOp
=
...
...
client_example/18_groupnorm/groupnorm_swish.cpp
View file @
a5abe1ad
...
...
@@ -13,8 +13,8 @@
#include "ck/library/tensor_operation_instance/gpu/normalization_swish.hpp"
using
XDataType
=
ck
::
half_t
;
using
GammaDataType
=
ck
::
half_
t
;
using
BetaDataType
=
ck
::
half_
t
;
using
GammaDataType
=
floa
t
;
using
BetaDataType
=
floa
t
;
using
YDataType
=
ck
::
half_t
;
using
ComputeDataType
=
float
;
using
Swish
=
ck
::
tensor_operation
::
element_wise
::
Swish
;
...
...
cmake/googletest.cmake
View file @
a5abe1ad
...
...
@@ -21,6 +21,7 @@ list(APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-comma
-Wno-old-style-cast
-Wno-deprecated
-Wno-unsafe-buffer-usage
)
message
(
STATUS
"Suppressing googltest warnings with flags:
${
GTEST_CMAKE_CXX_FLAGS
}
"
)
...
...
docs/.sphinx/requirements.in
View file @
a5abe1ad
git+https://github.com/RadeonOpenCompute/
rocm-docs-core
.git
rocm-docs-core
==0.2.0
sphinxcontrib-bibtex==2.5.0
docs/.sphinx/requirements.txt
View file @
a5abe1ad
...
...
@@ -2,9 +2,9 @@
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile requirements.in
# pip-compile
.sphinx/
requirements.in
#
accessible-pygments==0.0.
4
accessible-pygments==0.0.
3
# via pydata-sphinx-theme
alabaster==0.7.13
# via sphinx
...
...
@@ -20,7 +20,7 @@ babel==2.12.1
# sphinx
backcall==0.2.0
# via ipython
beautifulsoup4==4.1
2.0
beautifulsoup4==4.1
1.2
# via pydata-sphinx-theme
breathe==4.34.0
# via rocm-docs-core
...
...
@@ -34,7 +34,7 @@ click==8.1.3
# via
# jupyter-cache
# sphinx-external-toc
comm==0.1.
3
comm==0.1.
2
# via ipykernel
debugpy==1.6.6
# via ipykernel
...
...
@@ -65,13 +65,11 @@ idna==3.4
# via requests
imagesize==1.4.1
# via sphinx
importlib-metadata==6.
1
.0
importlib-metadata==6.
0
.0
# via
# jupyter-cache
# myst-nb
importlib-resources==5.10.4
# via rocm-docs-core
ipykernel==6.22.0
ipykernel==6.21.3
# via myst-nb
ipython==8.11.0
# via
...
...
@@ -87,7 +85,7 @@ jsonschema==4.17.3
# via nbformat
jupyter-cache==0.5.0
# via myst-nb
jupyter-client==8.
1.0
jupyter-client==8.
0.3
# via
# ipykernel
# nbclient
...
...
@@ -124,7 +122,7 @@ nbclient==0.5.13
# via
# jupyter-cache
# myst-nb
nbformat==5.
8.0
nbformat==5.
7.3
# via
# jupyter-cache
# myst-nb
...
...
@@ -187,7 +185,7 @@ pyyaml==6.0
# myst-parser
# pybtex
# sphinx-external-toc
pyzmq==25.0.
2
pyzmq==25.0.
1
# via
# ipykernel
# jupyter-client
...
...
@@ -195,8 +193,8 @@ requests==2.28.2
# via
# pygithub
# sphinx
rocm-docs-core
@ git+https://github.com/RadeonOpenCompute/rocm-docs-core.git
# via -r requirements.in
rocm-docs-core
==0.2.0
# via -r
.sphinx/
requirements.in
six==1.16.0
# via
# asttokens
...
...
@@ -235,9 +233,7 @@ sphinx-notfound-page==0.8.3
sphinxcontrib-applehelp==1.0.4
# via sphinx
sphinxcontrib-bibtex==2.5.0
# via
# -r requirements.in
# rocm-docs-core
# via -r .sphinx/requirements.in
sphinxcontrib-devhelp==1.0.2
# via sphinx
sphinxcontrib-htmlhelp==2.0.1
...
...
@@ -248,7 +244,7 @@ sphinxcontrib-qthelp==1.0.3
# via sphinx
sphinxcontrib-serializinghtml==1.1.5
# via sphinx
sqlalchemy==1.4.4
7
sqlalchemy==1.4.4
6
# via jupyter-cache
stack-data==0.6.2
# via ipython
...
...
include/ck/ck.hpp
View file @
a5abe1ad
...
...
@@ -168,6 +168,11 @@
// flag to enable (1) or disable (0) the debugging output in some kernels
#define DEBUG_LOG 0
// denorm test fix, required to work around dissue
#ifndef CK_WORKAROUND_DENORM_FIX
#define CK_WORKAROUND_DENORM_FIX 0
#endif
namespace
ck
{
enum
struct
InMemoryDataOperationEnum
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp
View file @
a5abe1ad
...
...
@@ -505,6 +505,15 @@ struct GridwiseGemmMultipleD_k0mk1_k0nk1_mn_wmma_cshuffle
}
// TODO: also check validity of all components (blockwise-copy, threadwise-copy, etc)
constexpr
long_index_t
TwoGB
=
(
long_index_t
{
1
}
<<
31
);
if
(
!
(
a_grid_desc_k0_m_k1
.
GetElementSpaceSize
()
*
sizeof
(
ADataType
)
<=
TwoGB
&&
b_grid_desc_k0_n_k1
.
GetElementSpaceSize
()
*
sizeof
(
BDataType
)
<=
TwoGB
&&
e_grid_desc_m_n
.
GetElementSpaceSize
()
*
sizeof
(
EDataType
)
<=
TwoGB
))
{
return
false
;
}
return
true
;
}
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp
View file @
a5abe1ad
...
...
@@ -96,7 +96,7 @@ struct GridwiseGemmMultipleD_xdl_cshuffle
// we convert fp16->fp32->bf16 and execute bf16 mfma instruction
// when mfma if fixed, remove this section and update
// ABDataTypeAdjusted -> ABDataType throughout this file
#if defined(__gfx90a__)
#if
CK_WORKAROUND_DENORM_FIX &&
defined(__gfx90a__)
using
ABDataTypeAdjusted
=
conditional_t
<
is_same_v
<
ABDataType
,
ck
::
half_t
>
,
ck
::
bhalf_t
,
ABDataType
>
;
#else
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
View file @
a5abe1ad
...
...
@@ -264,6 +264,13 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
}
// TODO: also check validity of all components (blockwise-copy, threadwise-copy, etc)
constexpr
long_index_t
TwoGB
=
(
long_index_t
{
1
}
<<
31
);
if
(
!
(
a_grid_desc_k0_m_k1
.
GetElementSpaceSize
()
*
sizeof
(
FloatA
)
<=
TwoGB
&&
b_grid_desc_k0_n_k1
.
GetElementSpaceSize
()
*
sizeof
(
FloatB
)
<=
TwoGB
))
{
return
false
;
}
return
true
;
}
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp
View file @
a5abe1ad
...
...
@@ -265,7 +265,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_bwd_weight
// we convert fp16->fp32->bf16 and execute bf16 mfma instruction
// when mfma if fixed, remove this section and update
// FloatABAdjusted -> FloatAB throughout this file
#if defined(__gfx90a__)
#if
CK_WORKAROUND_DENORM_FIX &&
defined(__gfx90a__)
using
FloatABAdjusted
=
conditional_t
<
is_same_v
<
FloatAB
,
ck
::
half_t
>
,
ck
::
bhalf_t
,
FloatAB
>
;
#else
using
FloatABAdjusted
=
FloatAB
;
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
View file @
a5abe1ad
...
...
@@ -135,7 +135,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3
// we convert fp16->fp32->bf16 and execute bf16 mfma instruction
// when mfma if fixed, remove this section and update
// FloatABAdjusted -> FloatAB throughout this file
#if defined(__gfx90a__)
#if
CK_WORKAROUND_DENORM_FIX &&
defined(__gfx90a__)
using
FloatABAdjusted
=
conditional_t
<
is_same_v
<
FloatAB
,
ck
::
half_t
>
,
ck
::
bhalf_t
,
FloatAB
>
;
#else
using
FloatABAdjusted
=
FloatAB
;
...
...
library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp
View file @
a5abe1ad
...
...
@@ -25,6 +25,10 @@ void add_device_normalization_rank_5_3_swish_f16_instances(
void
add_device_normalization_rank_5_3_swish_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceNormalization
<
F32
,
F32
,
F32
,
F32
,
F32
,
Swish
,
5
,
3
>>>&
);
// [x, gamma, beta, y] = [f16, f32, f32, f16]
void
add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceNormalization
<
F16
,
F32
,
F32
,
F32
,
F16
,
Swish
,
5
,
3
>>>&
);
template
<
typename
XDataType
,
typename
GammaDataType
,
typename
BetaDataType
,
...
...
@@ -70,6 +74,14 @@ struct DeviceOperationInstanceFactory<
add_device_normalization_rank_5_3_swish_f32_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
XDataType
,
F16
>
&&
is_same_v
<
GammaDataType
,
F32
>
&&
is_same_v
<
BetaDataType
,
F32
>
&&
is_same_v
<
YDataType
,
F16
>
)
{
if
constexpr
(
Rank
==
5
&&
NumReduceDim
==
3
)
{
add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances
(
op_ptrs
);
}
}
return
op_ptrs
;
}
...
...
library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt
View file @
a5abe1ad
...
...
@@ -7,4 +7,5 @@ add_instance_library(device_normalization_instance
device_groupnorm_f32_instance.cpp
device_groupnorm_swish_f16_instance.cpp
device_groupnorm_swish_f32_instance.cpp
device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
)
library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
0 → 100644
View file @
a5abe1ad
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
Swish
=
ck
::
tensor_operation
::
element_wise
::
Swish
;
void
add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceNormalization
<
F16
,
F32
,
F32
,
F32
,
F16
,
Swish
,
5
,
3
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_normalization_f16_f32_f32_f16_instances
<
Swish
,
5
,
3
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp
View file @
a5abe1ad
...
...
@@ -69,6 +69,32 @@ using device_normalization_f32_instances = std::tuple<
// clang-format on
>
;
template
<
typename
OutElementwise
,
index_t
Rank
,
index_t
Reduce
>
using
device_normalization_f16_f32_f32_f16_instances
=
std
::
tuple
<
// clang-format off
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
512
,
1
,
512
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
1024
,
1
,
1024
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
2
,
1
,
2
,
1
,
2
,
1
,
2
,
2
>
,
// irregular size
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
4
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
16
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
128
,
1
,
128
,
1
,
32
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
4
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
16
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
2
,
16
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
32
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
512
,
1
,
512
,
1
,
4
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
512
,
1
,
512
,
1
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
512
,
1
,
512
,
2
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
1024
,
1
,
1024
,
1
,
4
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
,
DeviceNormalizationImpl
<
F16
,
F32
,
F32
,
F32
,
F16
,
OutElementwise
,
Rank
,
Reduce
,
1024
,
1
,
1024
,
1
,
8
,
1
,
4
,
1
,
4
,
1
,
4
,
4
>
// clang-format on
>
;
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment