"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "ab598d80f309d554ee7eae07b7fb0497e817b0f4"
Unverified Commit 3eecbfb6 authored by rocking's avatar rocking Committed by GitHub
Browse files

Revise layout of group convolution (#675)

* [What] Remove pure conv int8 instance
[Why] We will never use pure int8 conv in AI, use int8 quantization instead

* Change layout

* Share the kernel parameter

* Support more type of NHWGC for group conv

* Revise client example of conv 2d, use NHWGC layout

* Add instance to cmake

* Revise layout of group conv quantization instance

* Revise layout of external api of group conv quantization

* Revise layout of group conv quantization client example

* Fix clang format

* Add comment to describe meaning of each parameter
parent 903cd19c
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using InDataType = int8_t;
using WeiDataType = int8_t;
using AccDataType = int32_t;
using OutDataType = int8_t;
using Empty_Tuple = ck::Tuple<>;
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
using InLayout = ck::tensor_layout::convolution::GNHWC;
using WeiLayout = ck::tensor_layout::convolution::GKYXC;
using OutLayout = ck::tensor_layout::convolution::GNHWK;
static constexpr auto ConvSpec =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
static constexpr auto Filter1x1Pad0 =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Pad0;
static constexpr auto Filter1x1Stride1Pad0 =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0;
static constexpr auto GemmPadingSpec = ck::tensor_operation::device::GemmSpecialization::MNKPadding;
using device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_int8_instances = std::tuple<
// clang-format off
// ########################################| NDim| InData| WeiData| MultpleD| OutData| AccData| InLayout| WeiLayout| MultipleD| OutLayout| In| Wei| Out| Convolution| GEMM| Block| MPer| NPer| K0Per| K1| M1Per| N1Per| KPer| M11N11Thread| M11N11Thread| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| CThreadTransfer|
// ########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< 2, InDataType, WeiDataType, Empty_Tuple, OutDataType, AccDataType, InLayout, WeiLayout, Empty_Tuple, OutLayout, InElementOp, WeiElementOp, OutElementOp, ConvSpec, GemmPadingSpec, 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, 4>
// clang-format on
>;
using device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_int8_Filter1x1Pad0_instances = std::tuple<
// clang-format off
// ########################################| NDim| InData| WeiData| MultpleD| OutData| AccData| InLayout| WeiLayout| MultipleD| OutLayout| In| Wei| Out| Convolution| GEMM| Block| MPer| NPer| K0Per| K1| M1Per| N1Per| KPer| M11N11Thread| M11N11Thread| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| CThreadTransfer|
// ########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< 2, InDataType, WeiDataType, Empty_Tuple, OutDataType, AccDataType, InLayout, WeiLayout, Empty_Tuple, OutLayout, InElementOp, WeiElementOp, OutElementOp, Filter1x1Pad0, GemmPadingSpec, 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, 4>
// clang-format on
>;
using device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_int8_Filter1x1Stride1Pad0_instances =
std::tuple<
// clang-format off
// ########################################| NDim| InData| WeiData| MultpleD| OutData| AccData| InLayout| WeiLayout| MultipleD| OutLayout| In| Wei| Out| Convolution| GEMM| Block| MPer| NPer| K0Per| K1| M1Per| N1Per| KPer| M11N11Thread| M11N11Thread| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| CThreadTransfer|
// ########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< 2, InDataType, WeiDataType, Empty_Tuple, OutDataType, AccDataType, InLayout, WeiLayout, Empty_Tuple, OutLayout, InElementOp, WeiElementOp, OutElementOp, Filter1x1Stride1Pad0, GemmPadingSpec, 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, 4>
// clang-format on
>;
void add_device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<2,
InLayout,
WeiLayout,
Empty_Tuple,
OutLayout,
InDataType,
WeiDataType,
Empty_Tuple,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_int8_instances{});
add_device_operation_instances(
instances, device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_int8_Filter1x1Pad0_instances{});
add_device_operation_instances(
instances,
device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_int8_Filter1x1Stride1Pad0_instances{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp"
#include "device_grouped_conv2d_fwd_common.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
template <typename InLayout,
typename WeiLayout,
typename DsLayout,
typename OutLayout,
typename DsDatatype,
typename CDEElementOp,
ConvolutionForwardSpecialization ConvSpec>
using device_grouped_conv2d_fwd_dl_f16_instances = std::tuple<
// clang-format off
// ########################################| NDim| InData| WeiData| MultpleD| OutData| AccData| InLayout| WeiLayout| MultipleD| OutLayout| In| Wei| Out| Convolution| GEMM| Block| MPer| NPer| K0Per| K1| M1Per| N1Per| KPer| M11N11Thread| M11N11Thread| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| CThreadTransfer|
// ########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< 2, F16, F16, DsDatatype, F16, F32, InLayout, WeiLayout, DsLayout, OutLayout, PassThrough, PassThrough, CDEElementOp, ConvSpec, GemmMNKPadding, 256, 128, 128, 16, 2, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 2>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 2>, S<1, 2, 0, 3>, S<1, 1, 1, 2>, S<8, 1, 1, 2>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 2>, S<1, 2, 0, 3>, S<1, 1, 1, 2>, S<0, 1, 2, 3, 4, 5>, 5, 4>
// clang-format on
>;
template <typename InLayout,
typename WeiLayout,
typename DsLayout,
typename OutLayout,
typename DsDatatype,
typename CDEElementOp,
ConvolutionForwardSpecialization ConvSpec>
using device_grouped_conv2d_fwd_dl_f32_instances = std::tuple<
// clang-format off
// clang-format off
// ########################################| NDim| InData| WeiData| MultpleD| OutData| AccData| InLayout| WeiLayout| MultipleD| OutLayout| In| Wei| Out| Convolution| GEMM| Block| MPer| NPer| K0Per| K1| M1Per| N1Per| KPer| M11N11Thread| M11N11Thread| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| CThreadTransfer|
// ########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< 2, F32, F32, DsDatatype, F32, F32, InLayout, WeiLayout, DsLayout, OutLayout, PassThrough, PassThrough, CDEElementOp, ConvSpec, GemmMNKPadding, 256, 128, 128, 16, 1, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 1>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 1>, S<1, 2, 0, 3>, S<1, 1, 1, 1>, S<8, 1, 1, 1>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 1>, S<1, 2, 0, 3>, S<1, 1, 1, 1>, S<0, 1, 2, 3, 4, 5>, 5, 4>
// clang-format on
>;
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "device_grouped_conv2d_fwd_xdl_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_xdl_bf16_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple,
PassThrough,
ConvFwdDefault>{});
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_xdl_bf16_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple,
PassThrough,
ConvFwd1x1P0>{});
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_xdl_bf16_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple,
PassThrough,
ConvFwd1x1S1P0>{});
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_xdl_bf16_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple,
PassThrough,
ConvFwdOddC>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "device_grouped_conv2d_fwd_xdl_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
F32,
F32,
Empty_Tuple,
F32,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_xdl_f32_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple,
PassThrough,
ConvFwdDefault>{});
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_xdl_f32_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple,
PassThrough,
ConvFwd1x1P0>{});
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_xdl_f32_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple,
PassThrough,
ConvFwd1x1S1P0>{});
add_device_operation_instances(instances,
device_grouped_conv2d_fwd_xdl_f32_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple,
PassThrough,
ConvFwdOddC>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
...@@ -19,9 +19,9 @@ using Empty_Tuple = ck::Tuple<>; ...@@ -19,9 +19,9 @@ using Empty_Tuple = ck::Tuple<>;
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
using GNHWC = ck::tensor_layout::convolution::GNHWC; using NHWGC = ck::tensor_layout::convolution::NHWGC;
using GKYXC = ck::tensor_layout::convolution::GKYXC; using GKYXC = ck::tensor_layout::convolution::GKYXC;
using GNHWK = ck::tensor_layout::convolution::GNHWK; using NHWGK = ck::tensor_layout::convolution::NHWGK;
using GK = ck::tensor_layout::convolution::G_K; using GK = ck::tensor_layout::convolution::G_K;
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using Relu = ck::tensor_operation::element_wise::Relu; using Relu = ck::tensor_operation::element_wise::Relu;
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances( void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -23,19 +23,28 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances( ...@@ -23,19 +23,28 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
{ {
// dl // dl
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -44,10 +53,10 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances( ...@@ -44,10 +53,10 @@ void add_device_conv2d_dl_bias_perchannel_quantization_int8_instances(
void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances( void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -58,19 +67,28 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances( ...@@ -58,19 +67,28 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
{ {
// dl // dl
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances( ...@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances( void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -93,19 +111,28 @@ void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances( ...@@ -93,19 +111,28 @@ void add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances(
{ {
// dl // dl
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances( void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
Add_Mul_Clamp>>>& instances) Add_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_bias_perlayer_quantization_int8_instances(
void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances( void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -56,21 +65,30 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances( ...@@ -56,21 +65,30 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
Add_Relu_Mul_Clamp>>>& instances) Add_Relu_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances( ...@@ -79,10 +97,10 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances( void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -92,21 +110,30 @@ void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances( ...@@ -92,21 +110,30 @@ void add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances(
Add_Mul_TanH_Mul_Clamp>>>& instances) Add_Mul_TanH_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -12,7 +12,10 @@ namespace device { ...@@ -12,7 +12,10 @@ namespace device {
namespace instance { namespace instance {
// clang-format off // clang-format off
template <typename DsLayout, template <typename InLayout,
typename WeiLayout,
typename DsLayout,
typename OutLayout,
typename DsDatatype, typename DsDatatype,
typename OutElementOp, typename OutElementOp,
ConvolutionForwardSpecialization ConvSpec, ConvolutionForwardSpecialization ConvSpec,
...@@ -23,7 +26,7 @@ using device_grouped_conv2d_dl_int8_instances = ...@@ -23,7 +26,7 @@ using device_grouped_conv2d_dl_int8_instances =
// ###########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector| // ###########################################| Spatial| Type| Type| Type| Type| Type| | | Layout| | Elementwise| Elementwise| Elementwise| Forward| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ###########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | | // ###########################################| | | | | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ###########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | // ###########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< NDimSpatial, int8_t, int8_t, DsDatatype, int8_t, int32_t, GNHWC, GKYXC, DsLayout, GNHWK, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, DstScalarPerVector> DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK< NDimSpatial, int8_t, int8_t, DsDatatype, int8_t, int32_t, InLayout, WeiLayout, DsLayout, OutLayout, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, DstScalarPerVector>
>; >;
// clang-format on // clang-format on
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_dl_perchannel_quantization_int8_instances( void add_device_conv2d_dl_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances(
Mul2_Clamp>>>& instances) Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perchannel_quantization_int8_instances(
void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances( void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perchannel_quantization_int8_instances(
Relu_Mul2_Clamp>>>& instances) Relu_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<GK_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_dl_perlayer_quantization_int8_instances( void add_device_conv2d_dl_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
Empty_Tuple, Empty_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
Empty_Tuple, Empty_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances(
Mul_Clamp>>>& instances) Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Mul_Clamp, Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_dl_perlayer_quantization_int8_instances(
void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances( void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
Empty_Tuple, Empty_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
Empty_Tuple, Empty_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_dl_relu_perlayer_quantization_int8_instances(
Relu_Mul_Clamp>>>& instances) Relu_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
4>{}); 4>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_dl_int8_instances<Empty_Tuple, device_grouped_conv2d_dl_int8_instances<NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
Empty_Tuple, Empty_Tuple,
Relu_Mul_Clamp, Relu_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
Add_Mul2_Clamp>>>& instances) Add_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_Clamp, Add_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perchannel_quantization_int8_instances(
void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
Add_Relu_Mul2_Clamp>>>& instances) Add_Relu_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Relu_Mul2_Clamp, Add_Relu_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -77,10 +95,10 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances( ...@@ -77,10 +95,10 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_GK_Tuple, GK_GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_F32_Tuple, I32_F32_Tuple,
...@@ -90,19 +108,28 @@ void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances( ...@@ -90,19 +108,28 @@ void add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances(
Add_Mul2_TanH_Mul_Clamp>>>& instances) Add_Mul2_TanH_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_GK_Tuple,
NHWGK,
I32_F32_Tuple, I32_F32_Tuple,
Add_Mul2_TanH_Mul_Clamp, Add_Mul2_TanH_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
Add_Mul_Clamp>>>& instances) Add_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_Clamp, Add_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances(
void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -56,21 +65,30 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances( ...@@ -56,21 +65,30 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
Add_Relu_Mul_Clamp>>>& instances) Add_Relu_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Relu_Mul_Clamp, Add_Relu_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -79,10 +97,10 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances( ...@@ -79,10 +97,10 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
I32_Tuple, I32_Tuple,
...@@ -92,21 +110,30 @@ void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances( ...@@ -92,21 +110,30 @@ void add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances(
Add_Mul_TanH_Mul_Clamp>>>& instances) Add_Mul_TanH_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
I32_Tuple, I32_Tuple,
Add_Mul_TanH_Mul_Clamp, Add_Mul_TanH_Mul_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
...@@ -9,10 +9,10 @@ namespace device { ...@@ -9,10 +9,10 @@ namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances( ...@@ -22,19 +22,28 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
Mul2_Clamp>>>& instances) Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Mul2_Clamp, Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances( ...@@ -43,10 +52,10 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
GNHWK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances( ...@@ -56,19 +65,28 @@ void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances(
Relu_Mul2_Clamp>>>& instances) Relu_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwdDefault, ConvFwdDefault,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwd1x1P0, ConvFwd1x1P0,
8>{}); 8>{});
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<GK_Tuple, device_grouped_conv2d_xdl_int8_instances<NHWGC,
GKYXC,
GK_Tuple,
NHWGK,
F32_Tuple, F32_Tuple,
Relu_Mul2_Clamp, Relu_Mul2_Clamp,
ConvFwd1x1S1P0, ConvFwd1x1S1P0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment