Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
a92f4ea8
Commit
a92f4ea8
authored
Apr 13, 2023
by
rocking
Browse files
Revise layout of group conv quantization instance
parent
89abcce5
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
308 additions
and
122 deletions
+308
-122
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp
...pu/quantization/conv2d_fwd/conv2d_quantization_common.hpp
+2
-2
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp
..._conv2d_dl_bias_perchannel_quantization_int8_instance.cpp
+42
-15
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp
...ce_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp
+42
-15
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp
...uantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp
+5
-2
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp
...evice_conv2d_dl_perchannel_quantization_int8_instance.cpp
+28
-10
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp
.../device_conv2d_dl_perlayer_quantization_int8_instance.cpp
+28
-10
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp
...conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp
+42
-15
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp
...e_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp
+42
-15
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp
...antization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp
+21
-18
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp
...vice_conv2d_xdl_perchannel_quantization_int8_instance.cpp
+28
-10
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp
...device_conv2d_xdl_perlayer_quantization_int8_instance.cpp
+28
-10
No files found.
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp
View file @
a92f4ea8
...
@@ -19,9 +19,9 @@ using Empty_Tuple = ck::Tuple<>;
...
@@ -19,9 +19,9 @@ using Empty_Tuple = ck::Tuple<>;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
G
NHWC
=
ck
::
tensor_layout
::
convolution
::
G
NHWC
;
using
NHW
G
C
=
ck
::
tensor_layout
::
convolution
::
NHW
G
C
;
using
GKYXC
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
GKYXC
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
G
NHWK
=
ck
::
tensor_layout
::
convolution
::
G
NHWK
;
using
NHW
G
K
=
ck
::
tensor_layout
::
convolution
::
NHW
G
K
;
using
GK
=
ck
::
tensor_layout
::
convolution
::
G_K
;
using
GK
=
ck
::
tensor_layout
::
convolution
::
G_K
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
Relu
=
ck
::
tensor_operation
::
element_wise
::
Relu
;
using
Relu
=
ck
::
tensor_operation
::
element_wise
::
Relu
;
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp
View file @
a92f4ea8
...
@@ -9,10 +9,10 @@ namespace device {
...
@@ -9,10 +9,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_conv2d_dl_bias_perchannel_quantization_int8_instances
(
void
add_device_conv2d_dl_bias_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_GK_Tuple
,
GK_GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_F32_Tuple
,
I32_F32_Tuple
,
...
@@ -23,19 +23,28 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
...
@@ -23,19 +23,28 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
{
{
// dl
// dl
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -44,10 +53,10 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
...
@@ -44,10 +53,10 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
void
add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances
(
void
add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_GK_Tuple
,
GK_GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_F32_Tuple
,
I32_F32_Tuple
,
...
@@ -58,19 +67,28 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
...
@@ -58,19 +67,28 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
{
{
// dl
// dl
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
...
@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
void
add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances
(
void
add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_GK_Tuple
,
GK_GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_F32_Tuple
,
I32_F32_Tuple
,
...
@@ -93,19 +111,28 @@ void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances(
...
@@ -93,19 +111,28 @@ void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances(
{
{
// dl
// dl
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp
View file @
a92f4ea8
...
@@ -9,10 +9,10 @@ namespace device {
...
@@ -9,10 +9,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_conv2d_dl_bias_perlayer_quantization_int8_instances
(
void
add_device_conv2d_dl_bias_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_Tuple
,
I32_Tuple
,
...
@@ -22,19 +22,28 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
...
@@ -22,19 +22,28 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
Add_Mul_Clamp
>>>&
instances
)
Add_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
Add_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
Add_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
Add_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -43,10 +52,10 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
...
@@ -43,10 +52,10 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
void
add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances
(
void
add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_Tuple
,
I32_Tuple
,
...
@@ -56,21 +65,30 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
...
@@ -56,21 +65,30 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
Add_Relu_Mul_Clamp
>>>&
instances
)
Add_Relu_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
Add_Relu_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
Add_Relu_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
Add_Relu_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
...
@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
void
add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances
(
void
add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_Tuple
,
I32_Tuple
,
...
@@ -92,21 +110,30 @@ void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances(
...
@@ -92,21 +110,30 @@ void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances(
Add_Mul_TanH_Mul_Clamp
>>>&
instances
)
Add_Mul_TanH_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp
View file @
a92f4ea8
...
@@ -12,7 +12,10 @@ namespace device {
...
@@ -12,7 +12,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
// clang-format off
// clang-format off
template
<
typename
DsLayout
,
template
<
typename
InLayout
,
typename
WeiLayout
,
typename
DsLayout
,
typename
OutLayout
,
typename
DsDatatype
,
typename
DsDatatype
,
typename
OutElementOp
,
typename
OutElementOp
,
ConvolutionForwardSpecialization
ConvSpec
,
ConvolutionForwardSpecialization
ConvSpec
,
...
@@ -23,7 +26,7 @@ using device_grouped_conv2d_dl_int8_instances =
...
@@ -23,7 +26,7 @@ using device_grouped_conv2d_dl_int8_instances =
// ###########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ###########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ###########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ###########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ###########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
// ###########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK
<
NDimSpatial
,
int8_t
,
int8_t
,
DsDatatype
,
int8_t
,
int32_t
,
GNHWC
,
GKYXC
,
DsLayout
,
GNHWK
,
PassThrough
,
PassThrough
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
256
,
128
,
128
,
16
,
4
,
4
,
4
,
1
,
S
<
8
,
2
>
,
S
<
8
,
2
>
,
S
<
8
,
1
,
1
,
4
>
,
S
<
2
,
1
,
128
,
1
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
4
,
1
,
1
,
4
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
1
,
1
,
1
,
4
>
,
S
<
8
,
1
,
1
,
4
>
,
S
<
2
,
1
,
128
,
1
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
4
,
1
,
1
,
4
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
1
,
1
,
1
,
4
>
,
S
<
0
,
1
,
2
,
3
,
4
,
5
>
,
5
,
DstScalarPerVector
>
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK
<
NDimSpatial
,
int8_t
,
int8_t
,
DsDatatype
,
int8_t
,
int32_t
,
InLayout
,
WeiLayout
,
DsLayout
,
OutLayout
,
PassThrough
,
PassThrough
,
OutElementOp
,
ConvSpec
,
GemmSpec
,
256
,
128
,
128
,
16
,
4
,
4
,
4
,
1
,
S
<
8
,
2
>
,
S
<
8
,
2
>
,
S
<
8
,
1
,
1
,
4
>
,
S
<
2
,
1
,
128
,
1
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
4
,
1
,
1
,
4
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
1
,
1
,
1
,
4
>
,
S
<
8
,
1
,
1
,
4
>
,
S
<
2
,
1
,
128
,
1
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
4
,
1
,
1
,
4
>
,
S
<
1
,
2
,
0
,
3
>
,
S
<
1
,
1
,
1
,
4
>
,
S
<
0
,
1
,
2
,
3
,
4
,
5
>
,
5
,
DstScalarPerVector
>
>
;
>
;
// clang-format on
// clang-format on
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp
View file @
a92f4ea8
...
@@ -9,10 +9,10 @@ namespace device {
...
@@ -9,10 +9,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_conv2d_dl_perchannel_quantization_int8_instances
(
void
add_device_conv2d_dl_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
F32_Tuple
,
F32_Tuple
,
...
@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances(
...
@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances(
Mul2_Clamp
>>>&
instances
)
Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
Mul2_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
Mul2_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
Mul2_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances(
...
@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances(
void
add_device_conv2d_dl_relu_perchannel_quantization_int8_instances
(
void
add_device_conv2d_dl_relu_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
F32_Tuple
,
F32_Tuple
,
...
@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances(
...
@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances(
Relu_Mul2_Clamp
>>>&
instances
)
Relu_Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
Relu_Mul2_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
Relu_Mul2_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
Relu_Mul2_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp
View file @
a92f4ea8
...
@@ -9,10 +9,10 @@ namespace device {
...
@@ -9,10 +9,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_conv2d_dl_perlayer_quantization_int8_instances
(
void
add_device_conv2d_dl_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
Empty_Tuple
,
Empty_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
Empty_Tuple
,
Empty_Tuple
,
...
@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances(
...
@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances(
Mul_Clamp
>>>&
instances
)
Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances(
...
@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances(
void
add_device_conv2d_dl_relu_perlayer_quantization_int8_instances
(
void
add_device_conv2d_dl_relu_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
Empty_Tuple
,
Empty_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
Empty_Tuple
,
Empty_Tuple
,
...
@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances(
...
@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances(
Relu_Mul_Clamp
>>>&
instances
)
Relu_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
Relu_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
Relu_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
4
>
{});
4
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_dl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
Relu_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp
View file @
a92f4ea8
...
@@ -9,10 +9,10 @@ namespace device {
...
@@ -9,10 +9,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances
(
void
add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_GK_Tuple
,
GK_GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_F32_Tuple
,
I32_F32_Tuple
,
...
@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
...
@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
Add_Mul2_Clamp
>>>&
instances
)
Add_Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
...
@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
void
add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances
(
void
add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_GK_Tuple
,
GK_GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_F32_Tuple
,
I32_F32_Tuple
,
...
@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
...
@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
Add_Relu_Mul2_Clamp
>>>&
instances
)
Add_Relu_Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -77,10 +95,10 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
...
@@ -77,10 +95,10 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
void
add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances
(
void
add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_GK_Tuple
,
GK_GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_F32_Tuple
,
I32_F32_Tuple
,
...
@@ -90,19 +108,28 @@ void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances(
...
@@ -90,19 +108,28 @@ void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances(
Add_Mul2_TanH_Mul_Clamp
>>>&
instances
)
Add_Mul2_TanH_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_GK_Tuple
,
NHWGK
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp
View file @
a92f4ea8
...
@@ -9,10 +9,10 @@ namespace device {
...
@@ -9,10 +9,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances
(
void
add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_Tuple
,
I32_Tuple
,
...
@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
...
@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
Add_Mul_Clamp
>>>&
instances
)
Add_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
Add_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
Add_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
Add_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
...
@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
void
add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances
(
void
add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_Tuple
,
I32_Tuple
,
...
@@ -56,21 +65,30 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
...
@@ -56,21 +65,30 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
Add_Relu_Mul_Clamp
>>>&
instances
)
Add_Relu_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
Add_Relu_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
Add_Relu_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
Add_Relu_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -79,10 +97,10 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
...
@@ -79,10 +97,10 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
void
add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances
(
void
add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
I32_Tuple
,
I32_Tuple
,
...
@@ -92,21 +110,30 @@ void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances(
...
@@ -92,21 +110,30 @@ void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances(
Add_Mul_TanH_Mul_Clamp
>>>&
instances
)
Add_Mul_TanH_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
I32_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp
View file @
a92f4ea8
This diff is collapsed.
Click to expand it.
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp
View file @
a92f4ea8
...
@@ -9,10 +9,10 @@ namespace device {
...
@@ -9,10 +9,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_conv2d_xdl_perchannel_quantization_int8_instances
(
void
add_device_conv2d_xdl_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
F32_Tuple
,
F32_Tuple
,
...
@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
...
@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
Mul2_Clamp
>>>&
instances
)
Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
Mul2_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
Mul2_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
Mul2_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
...
@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
void
add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances
(
void
add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
GK_Tuple
,
GK_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
F32_Tuple
,
F32_Tuple
,
...
@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances(
...
@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances(
Relu_Mul2_Clamp
>>>&
instances
)
Relu_Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
Relu_Mul2_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
Relu_Mul2_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
8
>
{});
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
GK_Tuple
,
NHWGK
,
F32_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
Relu_Mul2_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp
View file @
a92f4ea8
...
@@ -9,10 +9,10 @@ namespace device {
...
@@ -9,10 +9,10 @@ namespace device {
namespace
instance
{
namespace
instance
{
void
add_device_conv2d_xdl_perlayer_quantization_int8_instances
(
void
add_device_conv2d_xdl_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
Empty_Tuple
,
Empty_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
Empty_Tuple
,
Empty_Tuple
,
...
@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
...
@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
Mul_Clamp
>>>&
instances
)
Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
16
>
{});
16
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
16
>
{});
16
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
...
@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
void
add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances
(
void
add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
G
NHWC
,
NHW
G
C
,
GKYXC
,
GKYXC
,
Empty_Tuple
,
Empty_Tuple
,
G
NHWK
,
NHW
G
K
,
int8_t
,
int8_t
,
int8_t
,
int8_t
,
Empty_Tuple
,
Empty_Tuple
,
...
@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances(
...
@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances(
Relu_Mul_Clamp
>>>&
instances
)
Relu_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
Relu_Mul_Clamp
,
ConvFwdDefault
,
ConvFwdDefault
,
16
>
{});
16
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
Relu_Mul_Clamp
,
ConvFwd1x1P0
,
ConvFwd1x1P0
,
16
>
{});
16
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
NHWGC
,
GKYXC
,
Empty_Tuple
,
NHWGK
,
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
Relu_Mul_Clamp
,
ConvFwd1x1S1P0
,
ConvFwd1x1S1P0
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment