Commit 5641b889 authored by Bartlomiej Kocot's avatar Bartlomiej Kocot
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/composable_kernel...

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/composable_kernel into barkocot/fix-Filter1x1Pad0-check
parents 4ecef37f f2398f61
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#pragma once #pragma once
#include "conv2d_quantization_common.hpp" #include "conv2d_quantization_common.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -26,19 +26,19 @@ using device_grouped_conv2d_xdl_int8_instances = ...@@ -26,19 +26,19 @@ using device_grouped_conv2d_xdl_int8_instances =
//########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector| //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector|
//########################################| | | | | | | | | | | | Operation| Operation| Operation| | | Stage| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl| //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | Stage| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl|
//########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 256, 128, 64, 16, 16, 32, 32, 4, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 256, 128, 64, 16, 16, 32, 32, 4, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 128, 256, 64, 16, 16, 32, 32, 2, 4, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 128, 256, 64, 16, 16, 32, 32, 2, 4, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 128, 128, 64, 16, 16, 32, 32, 4, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 4>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 128, 128, 64, 16, 16, 32, 32, 4, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 4>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 128, 128, 64, 16, 16, 32, 32, 2, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 128, 128, 64, 16, 16, 32, 32, 2, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 128, 64, 64, 16, 16, 32, 32, 2, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 2>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 128, 64, 64, 16, 16, 32, 32, 2, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 2>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 64, 128, 64, 16, 16, 32, 32, 2, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 4>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 64, 128, 64, 16, 16, 32, 32, 2, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 4>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 64, 64, 64, 64, 16, 16, 32, 32, 2, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 2>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 64, 64, 64, 64, 16, 16, 32, 32, 2, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 2>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 128, 64, 64, 16, 16, 32, 32, 2, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 128, 64, 64, 16, 16, 32, 32, 2, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 64, 128, 64, 16, 16, 32, 32, 1, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 256, 64, 128, 64, 16, 16, 32, 32, 1, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 4>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 128, 32, 64, 16, 16, 32, 32, 2, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 2>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 128, 32, 64, 16, 16, 32, 32, 2, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 64, 1, 2>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 32, 128, 64, 16, 16, 32, 32, 1, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 4>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 128, 32, 128, 64, 16, 16, 32, 32, 1, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 4>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 64, 64, 32, 64, 16, 16, 32, 32, 2, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 2>, DstScalarPerVector>, DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 64, 64, 32, 64, 16, 16, 32, 32, 2, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 2>, DstScalarPerVector>,
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 64, 32, 64, 64, 16, 16, 32, 32, 1, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 2>, DstScalarPerVector> DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<NDimSpatial, ALayout, BLayout, DsLayout, ELayout, int8_t, int8_t, int32_t, int32_t, DsDatatype, int8_t, PassThrough, PassThrough, OutElementOp, ConvSpec, GemmSpec, 1, 64, 32, 64, 64, 16, 16, 32, 32, 1, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 1, 1, 1, S<1, 32, 1, 2>, DstScalarPerVector>
>; >;
// clang-format on // clang-format on
......
...@@ -8,18 +8,18 @@ namespace tensor_operation { ...@@ -8,18 +8,18 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<NDimSpatial,
NHWGC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
NHWGK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
int8_t, int8_t,
PassThrough, PassThrough,
PassThrough, PassThrough,
Mul2_Clamp>>>& instances) Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<NHWGC, device_grouped_conv2d_xdl_int8_instances<NHWGC,
...@@ -51,18 +51,18 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances( ...@@ -51,18 +51,18 @@ void add_device_conv2d_xdl_perchannel_quantization_int8_instances(
} }
void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances( void add_device_conv2d_xdl_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<NDimSpatial,
NHWGC, NHWGC,
GKYXC, GKYXC,
GK_Tuple, GK_Tuple,
NHWGK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
F32_Tuple, F32_Tuple,
int8_t, int8_t,
PassThrough, PassThrough,
PassThrough, PassThrough,
Relu_Mul2_Clamp>>>& instances) Relu_Mul2_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<NHWGC, device_grouped_conv2d_xdl_int8_instances<NHWGC,
......
...@@ -8,18 +8,18 @@ namespace tensor_operation { ...@@ -8,18 +8,18 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
void add_device_conv2d_xdl_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<NDimSpatial,
NHWGC, NHWGC,
GKYXC, GKYXC,
Empty_Tuple, Empty_Tuple,
NHWGK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
Empty_Tuple, Empty_Tuple,
int8_t, int8_t,
PassThrough, PassThrough,
PassThrough, PassThrough,
Mul_Clamp>>>& instances) Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<NHWGC, device_grouped_conv2d_xdl_int8_instances<NHWGC,
...@@ -51,18 +51,18 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances( ...@@ -51,18 +51,18 @@ void add_device_conv2d_xdl_perlayer_quantization_int8_instances(
} }
void add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances( void add_device_conv2d_xdl_relu_perlayer_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial, std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<NDimSpatial,
NHWGC, NHWGC,
GKYXC, GKYXC,
Empty_Tuple, Empty_Tuple,
NHWGK, NHWGK,
int8_t, int8_t,
int8_t, int8_t,
Empty_Tuple, Empty_Tuple,
int8_t, int8_t,
PassThrough, PassThrough,
PassThrough, PassThrough,
Relu_Mul_Clamp>>>& instances) Relu_Mul_Clamp>>>& instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv2d_xdl_int8_instances<NHWGC, device_grouped_conv2d_xdl_int8_instances<NHWGC,
......
...@@ -76,7 +76,7 @@ int profile_gemm_impl(int do_verification, ...@@ -76,7 +76,7 @@ int profile_gemm_impl(int do_verification,
break; break;
default: default:
a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{0.0, 0.1}); a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{0.0, 0.1});
b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.05, 0.05}); b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.01, 0.01});
} }
using AElementOp = ck::tensor_operation::element_wise::PassThrough; using AElementOp = ck::tensor_operation::element_wise::PassThrough;
......
...@@ -198,18 +198,18 @@ bool profile_grouped_conv_fwd_impl(int do_verification, ...@@ -198,18 +198,18 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
} }
}; };
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD<NDimSpatial, using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
InLayout, InLayout,
WeiLayout, WeiLayout,
ck::Tuple<>, ck::Tuple<>,
OutLayout, OutLayout,
InDataType, InDataType,
WeiDataType, WeiDataType,
ck::Tuple<>, ck::Tuple<>,
OutDataType, OutDataType,
InElementOp, InElementOp,
WeiElementOp, WeiElementOp,
OutElementOp>; OutElementOp>;
// get device op instances // get device op instances
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory< const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
......
add_gtest_executable(test_grouped_convnd_fwd test_grouped_convnd_fwd.cpp) add_gtest_executable(test_grouped_convnd_fwd test_grouped_convnd_fwd.cpp)
target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance) target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance)
add_gtest_executable(test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp)
target_link_libraries(test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility)
add_gtest_executable(test_grouped_convnd_fwd_multi_d_interface_compatibility test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp)
target_link_libraries(test_grouped_convnd_fwd_multi_d_interface_compatibility PRIVATE utility device_grouped_conv3d_fwd_instance)
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include <gtest/gtest.h>
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using ScaleAdd = ck::tensor_operation::element_wise::ScaleAdd;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
template <typename DataType,
typename InDataTypes,
typename WeiDataTypes,
typename InElementOp,
typename WeiElementOp>
class TestGroupedConvndFwdMultiABInterfaceBase : public ::testing::Test
{
protected:
static constexpr ck::index_t NDimSpatial = 3;
static constexpr ck::index_t NumAs = 2;
static constexpr ck::index_t NumBs = 2;
static constexpr auto ConvSpec =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecialization::MNKPadding;
using InLayout = ck::tensor_layout::convolution::GNDHWC;
using WeiLayout = ck::tensor_layout::convolution::GKZYXC;
using OutLayout = ck::tensor_layout::convolution::GNDHWK;
using OutElementOp = PassThrough;
using DeviceGroupedConvNDMultiABFwdInstance =
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<
NDimSpatial,
InLayout,
WeiLayout,
ck::Tuple<>,
OutLayout,
InDataTypes,
WeiDataTypes,
DataType,
DataType,
ck::Tuple<>,
DataType,
InElementOp,
WeiElementOp,
OutElementOp,
ConvSpec, // ConvForwardSpecialization
GemmSpec, // GemmSpecialization
1, //
256, // BlockSize
128, // MPerBlock
256, // NPerBlock
32, // KPerBlock
8, // AK1
8, // BK1
32, // MPerXdl
32, // NPerXdl
2, // MXdlPerWave
4, // NXdlPerWave
S<4, 64, 1>, // ABlockTransferThreadClusterLengths_AK0_M_AK1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
2, // ABlockTransferSrcVectorDim
8, // ABlockTransferSrcScalarPerVector
8, // ABlockTransferDstScalarPerVector_AK1
1, // ABlockLdsExtraM
S<4, 64, 1>, // BBlockTransferThreadClusterLengths_BK0_N_BK1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim
8, // BBlockTransferSrcScalarPerVector
8, // BBlockTransferDstScalarPerVector_BK1
1, // BBlockLdsExtraN
1,
1,
S<1, 32, 1, 8>,
8>;
const ck::utils::conv::ConvParam conv_param{
3, 1, 16, 16, 8, {3, 3, 3}, {17, 17, 17}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}};
void SetUp() override
{
if(!ck::is_xdl_supported())
{
GTEST_SKIP();
}
}
template <typename ADataType, typename BDataType>
bool Run(ADataType as, BDataType bs)
{
const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(
conv_param);
const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(
conv_param);
const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(
conv_param);
std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_lengths{};
std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_strides{};
std::array<ck::index_t, NDimSpatial + 3> b_g_k_c_xs_lengths{};
std::array<ck::index_t, NDimSpatial + 3> b_g_k_c_xs_strides{};
std::array<ck::index_t, NDimSpatial + 3> e_g_n_k_wos_lengths{};
std::array<ck::index_t, NDimSpatial + 3> e_g_n_k_wos_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
std::array<ck::index_t, NDimSpatial> input_left_pads{};
std::array<ck::index_t, NDimSpatial> input_right_pads{};
auto copy = [](const auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
copy(wei_g_k_c_xs_desc.GetLengths(), b_g_k_c_xs_lengths);
copy(wei_g_k_c_xs_desc.GetStrides(), b_g_k_c_xs_strides);
copy(out_g_n_k_wos_desc.GetLengths(), e_g_n_k_wos_lengths);
copy(out_g_n_k_wos_desc.GetStrides(), e_g_n_k_wos_strides);
copy(conv_param.conv_filter_strides_, conv_filter_strides);
copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
copy(conv_param.input_left_pads_, input_left_pads);
copy(conv_param.input_right_pads_, input_right_pads);
std::array<const void*, 0> ds{};
// do Conv
auto conv = DeviceGroupedConvNDMultiABFwdInstance{};
auto invoker = conv.MakeInvoker();
auto argument = conv.MakeArgument(as,
bs,
ds,
nullptr,
a_g_n_c_wis_lengths,
a_g_n_c_wis_strides,
b_g_k_c_xs_lengths,
b_g_k_c_xs_strides,
{},
{},
e_g_n_k_wos_lengths,
e_g_n_k_wos_strides,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads,
InElementOp{},
WeiElementOp{},
OutElementOp{});
return conv.IsSupportedArgument(argument);
}
};
class TestGroupedConvndFwdMultiAInterface
: public TestGroupedConvndFwdMultiABInterfaceBase<float,
ck::Tuple<float, float>,
float,
ScaleAdd,
PassThrough>
{
};
class TestGroupedConvndFwdMultiBInterface
: public TestGroupedConvndFwdMultiABInterfaceBase<float,
float,
ck::Tuple<float, float>,
PassThrough,
ScaleAdd>
{
};
class TestGroupedConvndFwdMultiABInterface
: public TestGroupedConvndFwdMultiABInterfaceBase<float,
ck::Tuple<float, float>,
ck::Tuple<float, float>,
ScaleAdd,
ScaleAdd>
{
};
class TestGroupedConvndFwdInterface
: public TestGroupedConvndFwdMultiABInterfaceBase<float, float, float, PassThrough, PassThrough>
{
};
TEST_F(TestGroupedConvndFwdMultiAInterface, MultiA)
{
std::array<const void*, NumAs> as{nullptr, nullptr};
const void* b = nullptr;
EXPECT_TRUE(this->template Run(as, b));
}
TEST_F(TestGroupedConvndFwdMultiBInterface, MultiB)
{
const void* a = nullptr;
std::array<const void*, NumBs> bs{nullptr, nullptr};
EXPECT_TRUE(this->template Run(a, bs));
}
TEST_F(TestGroupedConvndFwdMultiABInterface, MultiAB)
{
std::array<const void*, NumAs> as{nullptr, nullptr};
std::array<const void*, NumBs> bs{nullptr, nullptr};
EXPECT_TRUE(this->template Run(as, bs));
}
TEST_F(TestGroupedConvndFwdInterface, SingleAB)
{
const void* a = nullptr;
const void* b = nullptr;
EXPECT_TRUE(this->template Run(a, b));
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp"
#include <gtest/gtest.h>
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
class TestGroupedConvndFwdMultiDInterfaceCompatibility : public ::testing::Test
{
protected:
static constexpr ck::index_t NDimSpatial = 3;
using InDataType = float;
using WeiDataType = float;
using OutDataType = float;
using InLayout = ck::tensor_layout::convolution::GNDHWC;
using WeiLayout = ck::tensor_layout::convolution::GKZYXC;
using OutLayout = ck::tensor_layout::convolution::GNDHWK;
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD<NDimSpatial,
InLayout,
WeiLayout,
ck::Tuple<>,
OutLayout,
InDataType,
WeiDataType,
ck::Tuple<>,
OutDataType,
PassThrough,
PassThrough,
PassThrough>;
bool Run()
{
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
return op_ptrs.size() != 0;
}
};
TEST_F(TestGroupedConvndFwdMultiDInterfaceCompatibility, CompatibilityTest)
{
EXPECT_TRUE(this->Run());
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment