"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "c2f1737a95f668f86117e3f26454eddc54c9d9e2"
Commit a92f4ea8 authored by rocking's avatar rocking
Browse files

Revise layout of group conv quantization instance

parent 89abcce5
...@@ -19,9 +19,9 @@ using Empty_Tuple = ck::Tuple<>; ...@@ -19,9 +19,9 @@ using Empty_Tuple = ck::Tuple<>;
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
using GNHWC = ck::tensor_layout::convolution::GNHWC; using NHWGC = ck::tensor_layout::convolution::NHWGC;
using GKYXC = ck::tensor_layout::convolution::GKYXC; using GKYXC = ck::tensor_layout::convolution::GKYXC;
using GNHWK = ck::tensor_layout::convolution::GNHWK; using NHWGK = ck::tensor_layout::convolution::NHWGK;
using GK = ck::tensor_layout::convolution::G_K; using GK = ck::tensor_layout::convolution::G_K;
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using Relu = ck::tensor_operation::element_wise::Relu; using Relu = ck::tensor_operation::element_wise::Relu;
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances( void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -23,19 +23,28 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances( ...@@ -23,19 +23,28 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
{ {
// dl // dl
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -44,10 +53,10 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances( ...@@ -44,10 +53,10 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances( void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -58,19 +67,28 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances( ...@@ -58,19 +67,28 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
{ {
// dl // dl
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances( ...@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances( void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -93,19 +111,28 @@ void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances( ...@@ -93,19 +111,28 @@ void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances(
{ {
// dl // dl
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances( void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
Add_Mul_Clamp>>>& instances) Add_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances( void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -56,21 +65,30 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances( ...@@ -56,21 +65,30 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
Add_Relu_Mul_Clamp>>>& instances) Add_Relu_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances( ...@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances( void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -92,21 +110,30 @@ void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances( ...@@ -92,21 +110,30 @@ void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances(
Add_Mul_TanH_Mul_Clamp>>>& instances) Add_Mul_TanH_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -12,7 +12,10 @@ namespace device { ...@@ -12,7 +12,10 @@ namespace device {
namespace instance { namespace instance {
// clang-format off // clang-format off
template <typename DsLayout, template <typename InLayout,
typename WeiLayout,
typename DsLayout,
typename OutLayout,
typename DsDatatype, typename DsDatatype,
typename OutElementOp, typename OutElementOp,
ConvolutionForwardSpecialization ConvSpec, ConvolutionForwardSpecialization ConvSpec,
...@@ -23,7 +26,7 @@ using device_grouped_conv2d_dl_int8_instances = ...@@ -23,7 +26,7 @@ using device_grouped_conv2d_dl_int8_instances =
// ###########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector| // ###########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ###########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | | // ###########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ###########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | // ###########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< NDimSpatial, int8_t, int8_t, DsDatatype, int8_t, int32_t, GNHWC, GKYXC, DsLayout, GNHWK, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, DstScalarPerVector> DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< NDimSpatial, int8_t, int8_t, DsDatatype, int8_t, int32_t, InLayout, WeiLayout, DsLayout, OutLayout, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, DstScalarPerVector>
>; >;
// clang-format on // clang-format on
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_dl_perchannel_quantization_int8_instances( void add_device_conv2d_dl_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances(
Mul2_Clamp>>>& instances) Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances(
void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances( void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances(
Relu_Mul2_Clamp>>>& instances) Relu_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_dl_perlayer_quantization_int8_instances( void add_device_conv2d_dl_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
Empty_Tuple, Empty_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
Empty_Tuple, Empty_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances(
Mul_Clamp>>>& instances) Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances(
void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances( void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
Empty_Tuple, Empty_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
Empty_Tuple, Empty_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances(
Relu_Mul_Clamp>>>& instances) Relu_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
Add_Mul2_Clamp>>>& instances) Add_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
Add_Relu_Mul2_Clamp>>>& instances) Add_Relu_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -77,10 +95,10 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances( ...@@ -77,10 +95,10 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -90,19 +108,28 @@ void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances( ...@@ -90,19 +108,28 @@ void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances(
Add_Mul2_TanH_Mul_Clamp>>>& instances) Add_Mul2_TanH_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
Add_Mul_Clamp>>>& instances) Add_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -56,21 +65,30 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances( ...@@ -56,21 +65,30 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
Add_Relu_Mul_Clamp>>>& instances) Add_Relu_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -79,10 +97,10 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances( ...@@ -79,10 +97,10 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -92,21 +110,30 @@ void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances( ...@@ -92,21 +110,30 @@ void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances(
Add_Mul_TanH_Mul_Clamp>>>& instances) Add_Mul_TanH_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
Mul2_Clamp>>>& instances) Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances(
Relu_Mul2_Clamp>>>& instances) Relu_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
Empty_Tuple, Empty_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
Empty_Tuple, Empty_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
Mul_Clamp>>>& instances) Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<Empty_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
16>{}); 16>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<Empty_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
16>{}); 16>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<Empty_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
void add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
Empty_Tuple, Empty_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
Empty_Tuple, Empty_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances(
Relu_Mul_Clamp>>>& instances) Relu_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<Empty_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
16>{}); 16>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<Empty_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
16>{}); 16>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<Empty_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment